Ajouts à projet phylo

Contrastive learning
Ajout détails VGAE
2026-05-11 14:04:39 +02:00 · 2026-05-11 13:51:45 +02:00 · 2026-05-11 13:45:28 +02:00 · 2026-05-07 18:09:08 +02:00 · 2026-05-07 16:01:57 +02:00 · 2026-05-07 16:01:47 +02:00
71 changed files with 62744 additions and 46 deletions
--- a/.gitignore
+++ b/.gitignore
@ -311,3 +311,4 @@ TSWLatexianTemp*
 /.quarto/
 public/
 /.luarc.json
--- a/.woodpecker.yml
+++ b/.woodpecker.yml
@ -1,3 +1,8 @@
 clone:
  git:
    image: woodpeckerci/plugin-git
    branch: develop
 steps:
  render-site:
    image: ghcr.io/quarto-dev/quarto:1.7.22
@ -7,7 +12,7 @@ steps:
    when:
      event: [push, pull_request, cron, manual]
      branch:
-        - main
+        - develop
  # Push le contenu du dossier public sur le dépôt `pages` de git.polarolouis.fr
  # On utilise l'image alpine/git pour avoir git et ssh
@ -16,23 +21,21 @@ steps:
    commands:
      - git config --global user.name "Woodpecker CI"
      - git config --global user.email "git@polarolouis.fr"
-      - git clone -b pages "https://$${ACCESS_TOKEN}@git.polarolouis.fr/polarolouis/these-recap-hebdo.git" $DESTINATION
+      - git clone -b main "https://$${ACCESS_TOKEN}@git.polarolouis.fr/polarolouis/these-recap-hebdo.git" $DESTINATION
      - rm -rf $DESTINATION/* && echo "Cleaned $DESTINATION" || echo "Failed to clean $DESTINATION"
      - cp -ar $CI_WORKSPACE/public/* $DESTINATION/
      - cd $DESTINATION
      - ls -la
      - git add --all
-      - git commit -m "Deploy site ${CI_BUILD_CREATED} [CI SKIP]" || echo "Nothing to commit"
+      - git commit -m "Deploy site $CI_BUILD_CREATED [CI SKIP]" || echo "Nothing to commit"
      - git push && echo "Pushed to $DESTINATION" || echo "Failed to push to $DESTINATION"
    environment:
      ACCESS_TOKEN:
        from_secret: access_token
      DESTINATION: pages
    when:
-      event:
+      event: [push, pull_request, cron, manual]
        - push
        - pull_request
      branch:
-        - main
+        - develop
    depends_on:
      - render-site
--- a/README.md
+++ b/README.md
@ -1,2 +1,3 @@
 # these-recap-hebdo
 ![Build status](https://woodpecker.polarolouis.fr/api/badges/4/status.svg)
--- a/_freeze/site_libs/clipboard/clipboard.min.js
+++ b/_freeze/site_libs/clipboard/clipboard.min.js
--- a/_freeze/site_libs/quarto-listing/list.min.js
+++ b/_freeze/site_libs/quarto-listing/list.min.js
--- a/_freeze/site_libs/quarto-listing/quarto-listing.js
+++ b/_freeze/site_libs/quarto-listing/quarto-listing.js
@ -0,0 +1,243 @@
 const kProgressiveAttr = "data-src";
 let categoriesLoaded = false;
 window.quartoListingCategory = (category) => {
  if (categoriesLoaded) {
    activateCategory(category);
    setCategoryHash(category);
  }
 };
 window["quarto-listing-loaded"] = () => {
  // Process any existing hash
  const hash = getHash();
  if (hash) {
    // If there is a category, switch to that
    if (hash.category) {
      activateCategory(hash.category);
    }
    // Paginate a specific listing
    const listingIds = Object.keys(window["quarto-listings"]);
    for (const listingId of listingIds) {
      const page = hash[getListingPageKey(listingId)];
      if (page) {
        showPage(listingId, page);
      }
    }
  }
  const listingIds = Object.keys(window["quarto-listings"]);
  for (const listingId of listingIds) {
    // The actual list
    const list = window["quarto-listings"][listingId];
    // Update the handlers for pagination events
    refreshPaginationHandlers(listingId);
    // Render any visible items that need it
    renderVisibleProgressiveImages(list);
    // Whenever the list is updated, we also need to
    // attach handlers to the new pagination elements
    // and refresh any newly visible items.
    list.on("updated", function () {
      renderVisibleProgressiveImages(list);
      setTimeout(() => refreshPaginationHandlers(listingId));
      // Show or hide the no matching message
      toggleNoMatchingMessage(list);
    });
  }
 };
 window.document.addEventListener("DOMContentLoaded", function (_event) {
  // Attach click handlers to categories
  const categoryEls = window.document.querySelectorAll(
    ".quarto-listing-category .category"
  );
  for (const categoryEl of categoryEls) {
    const category = categoryEl.getAttribute("data-category");
    categoryEl.onclick = () => {
      activateCategory(category);
      setCategoryHash(category);
    };
  }
  // Attach a click handler to the category title
  // (there should be only one, but since it is a class name, handle N)
  const categoryTitleEls = window.document.querySelectorAll(
    ".quarto-listing-category-title"
  );
  for (const categoryTitleEl of categoryTitleEls) {
    categoryTitleEl.onclick = () => {
      activateCategory("");
      setCategoryHash("");
    };
  }
  categoriesLoaded = true;
 });
 function toggleNoMatchingMessage(list) {
  const selector = `#${list.listContainer.id} .listing-no-matching`;
  const noMatchingEl = window.document.querySelector(selector);
  if (noMatchingEl) {
    if (list.visibleItems.length === 0) {
      noMatchingEl.classList.remove("d-none");
    } else {
      if (!noMatchingEl.classList.contains("d-none")) {
        noMatchingEl.classList.add("d-none");
      }
    }
  }
 }
 function setCategoryHash(category) {
  setHash({ category });
 }
 function setPageHash(listingId, page) {
  const currentHash = getHash() || {};
  currentHash[getListingPageKey(listingId)] = page;
  setHash(currentHash);
 }
 function getListingPageKey(listingId) {
  return `${listingId}-page`;
 }
 function refreshPaginationHandlers(listingId) {
  const listingEl = window.document.getElementById(listingId);
  const paginationEls = listingEl.querySelectorAll(
    ".pagination li.page-item:not(.disabled) .page.page-link"
  );
  for (const paginationEl of paginationEls) {
    paginationEl.onclick = (sender) => {
      setPageHash(listingId, sender.target.getAttribute("data-i"));
      showPage(listingId, sender.target.getAttribute("data-i"));
      return false;
    };
  }
 }
 function renderVisibleProgressiveImages(list) {
  // Run through the visible items and render any progressive images
  for (const item of list.visibleItems) {
    const itemEl = item.elm;
    if (itemEl) {
      const progressiveImgs = itemEl.querySelectorAll(
        `img[${kProgressiveAttr}]`
      );
      for (const progressiveImg of progressiveImgs) {
        const srcValue = progressiveImg.getAttribute(kProgressiveAttr);
        if (srcValue) {
          progressiveImg.setAttribute("src", srcValue);
        }
        progressiveImg.removeAttribute(kProgressiveAttr);
      }
    }
  }
 }
 function getHash() {
  // Hashes are of the form
  // #name:value|name1:value1|name2:value2
  const currentUrl = new URL(window.location);
  const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined;
  return parseHash(hashRaw);
 }
 const kAnd = "&";
 const kEquals = "=";
 function parseHash(hash) {
  if (!hash) {
    return undefined;
  }
  const hasValuesStrs = hash.split(kAnd);
  const hashValues = hasValuesStrs
    .map((hashValueStr) => {
      const vals = hashValueStr.split(kEquals);
      if (vals.length === 2) {
        return { name: vals[0], value: vals[1] };
      } else {
        return undefined;
      }
    })
    .filter((value) => {
      return value !== undefined;
    });
  const hashObj = {};
  hashValues.forEach((hashValue) => {
    hashObj[hashValue.name] = decodeURIComponent(hashValue.value);
  });
  return hashObj;
 }
 function makeHash(obj) {
  return Object.keys(obj)
    .map((key) => {
      return `${key}${kEquals}${obj[key]}`;
    })
    .join(kAnd);
 }
 function setHash(obj) {
  const hash = makeHash(obj);
  window.history.pushState(null, null, `#${hash}`);
 }
 function showPage(listingId, page) {
  const list = window["quarto-listings"][listingId];
  if (list) {
    list.show((page - 1) * list.page + 1, list.page);
  }
 }
 function activateCategory(category) {
  // Deactivate existing categories
  const activeEls = window.document.querySelectorAll(
    ".quarto-listing-category .category.active"
  );
  for (const activeEl of activeEls) {
    activeEl.classList.remove("active");
  }
  // Activate this category
  const categoryEl = window.document.querySelector(
    `.quarto-listing-category .category[data-category='${category}'`
  );
  if (categoryEl) {
    categoryEl.classList.add("active");
  }
  // Filter the listings to this category
  filterListingCategory(category);
 }
 function filterListingCategory(category) {
  const listingIds = Object.keys(window["quarto-listings"]);
  for (const listingId of listingIds) {
    const list = window["quarto-listings"][listingId];
    if (list) {
      if (category === "") {
        // resets the filter
        list.filter();
      } else {
        // filter to this category
        list.filter(function (item) {
          const itemValues = item.values();
          if (itemValues.categories !== null) {
            const categories = itemValues.categories.split(",");
            return categories.includes(category);
          } else {
            return false;
          }
        });
      }
    }
  }
 }
--- a/_freeze/suivi/2025-50/2025-50/execute-results/html.json
+++ b/_freeze/suivi/2025-50/2025-50/execute-results/html.json
--- a/_macros.tex
+++ b/_macros.tex
@ -0,0 +1,11 @@
 \newcommand{\ELBO}[2]{\mathcal{J}(#1,#2)}
 \newcommand{\R}{\mathcal{R}}
 \newcommand{\ELBORTheta}{\ELBO{\R}{\pmb{\theta}}}
 \newcommand{\Var}{\mathbb{V}}
 \newcommand{\Esp}{\mathbb{E}}
 \newcommand{\Prob}{\mathbb{P}}
 \newcommand{\calL}{\mathcal{L}}
 \newcommand{\Normal}{\mathcal{N}}
 \DeclareMathOperator{\ilr}{ilr}
 \DeclareMathOperator{\clr}{clr}
 \DeclareMathOperator{\Cat}{Cat}
--- a/_quarto.yml
+++ b/_quarto.yml
@ -2,24 +2,36 @@ project:
  type: website
  output-dir: public
 toc: true
 number-sections: true
 website:
  title: "Suivi de la thèse"
  navbar:
    left:
-      - href: index.qmd
+      - icon: journals
        href: index.qmd
        text: "Liste des semaines"
    right: 
      - icon: git
        href: https://git.polarolouis.fr/polarolouis/these-recap-hebdo
        aria-label: Dépôt Git du journal
 lang: fr
 date: last-modified
 date-modified: last-modified
 author:
-  name: "Louis LACOSTE"
+  name: Louis Lacoste
-  email: "louis.lacoste@agroparistech.fr"
+  email: louis.lacoste@agroparistech.fr
  affiliation: MIA Paris-Saclay, INRAE, AgroParisTech, Université Paris-Saclay
  orcid: 0009-0004-0178-9821
  github: Polarolouis
 format: 
    html:
      theme: yeti
      toc: true
      html-math-method: katex
-      embed-resources: true
+      embed-resources: false
--- a/index.qmd
+++ b/index.qmd
@ -1,8 +1,28 @@
 ---
 title: "Journal suivi de la thèse"
 listing:
    - id: journal-these
      contents: suivi
      type: default
      sort: "date desc"
      categories: true
      page-size: 5
    - id: knowledge-base
      contents: knowledge_base
      type: default
      sort: "date desc"
      categories: true
 ---
 ::: {.callout-note icon="false" collapse="true"}
 ## Agenda
 <iframe src="https://calendar.google.com/calendar/embed?height=400&wkst=2&ctz=Europe%2FParis&showPrint=0&mode=AGENDA&src=NTc4ZDI5ZGIwZmFiMGZjZjk1ZWM2NjQ4OWFjYTFmYzkxNzAyMGU2ODk1YjRmMTQ1NjA1YTRlMWU0MzU3N2FkOUBncm91cC5jYWxlbmRhci5nb29nbGUuY29t&color=%234285f4" style="border:solid 1px #777" width="100%" height="400" frameborder="0" scrolling="no"></iframe>
 :::
 ## Base de connaissances et trucs en vrac
 :::{#knowledge-base}
 ::: 
 ## Journaux
 :::{#journal-these}
 :::
--- a/knowledge_base/_metadata.yml
+++ b/knowledge_base/_metadata.yml
@ -0,0 +1,3 @@
 categories: []
 date: last-modified
 date-modified: last-modified
--- a/knowledge_base/colsbm_application_reseaux_et_agri.qmd
+++ b/knowledge_base/colsbm_application_reseaux_et_agri.qmd
@ -0,0 +1,34 @@
 ---
 title: "Pour application du modèle colBiSBM sur données interaction PP et pratiques agricoles"
 categories: [application, agricole, graphe, collection, lbm, sbm]
 ---
 {{< include /_macros.tex >}}
 # Idée de l'application
 En discutant avec Alizée et grâce aux ressources de la [section "Liens" données par Jean](#liens) possible d'essayer de voir l'impact sur la structure des réseaux plantes-pollinisateurs des pratiques agricoles autour des espaces de pollinisation.
 # Point à éclaircir
 1. Quels réseaux plantes-pollinisateurs choisir, où les trouver ? Besoin de réseaux en France pour la facilité.
 2. Faut-il utiliser les covariables seulement de manière *post-hoc* pour corréler avec le *clustering* de réseaux obtenus ?
 3. Comment encoder les covariables ?
    - Est-ce que je les mets sous forme de pourcentage dans un *buffer* (quel rayon ?) comme Jean ? Alors problèmes inhérents aux données compositionnelles mais facilité d'exécution ?
    - Quelle distance considérer pour l'impact des pratiques agricoles, distance variables par pollinisateurs en soit ? Besoin de connaissances expertes.
    - Besoin d'homogénéiser les échelles ? Ou a minima d'en choisir une ou plusieurs à considérer pour les covariables ?
    - Gestion de gros tableaux de données pas simple.
 4. **Le temps ???**
 # Liens
 CORINE Land Cover et extraction en R
 Très gros grain :
 <https://fr.wikipedia.org/wiki/Corine_Land_Cover> et le package de Jean pour l'extraction des *buffers* de types d'utilisation des sols :
 <https://github.com/jean-cohen/corine.land.cover.landuse.extraction>
 Les cartes de données :
 - Carte du Bio et des types de cultures échelle parcelle : <https://www.agencebio.org/cartobio/>
 - Échelle code postal, achat de phytosanitaires : <https://ventes-produits-phytopharmaceutiques.eaufrance.fr/>
 - Thèse de Milena Cairo, classification des parcelles selon les pratiques en pesticides : <https://theses.hal.science/tel-05038286>
 - Recensement des parcelles et du type de culture : <https://cartes.gouv.fr/rechercher-une-donnee/dataset/IGNF_RPG?redirected_from=geoservices.ign.fr>
--- a/knowledge_base/figs/projets-phylo/dag-simple.pdf
+++ b/knowledge_base/figs/projets-phylo/dag-simple.pdf
--- a/knowledge_base/figs/projets-phylo/dag-simple.tex
+++ b/knowledge_base/figs/projets-phylo/dag-simple.tex
@ -0,0 +1,54 @@
 \documentclass{standalone}
 \usepackage{tikz}
 \usetikzlibrary{positioning,shapes.arrows, arrows.meta,shapes.geometric}
 \begin{document}
 \begin{tikzpicture}
    \tikzset{
    every path/.append style = {
            arrows = ->,
            > = stealth,
        },
    every node/.append style = {
            shape = circle,
            draw = black,
            minimum size=3em
        },
    latent/.style = {
            fill = lightgray
        },
    prior/.style = {
            fill = red
        },
    moral/.style = {
    dashed,
    > = {},       % remove arrow tip
    arrows = -,   % ensure no arrows
    }
    }
    \node (y) {$Y$};
    \node[latent] (z) [above left = of y] {$Z$};
    \node[latent] (w) [above right = of y] {$W$};
    \node[latent] (P) [above = of z] {$P$};
    \node[prior] (sigma2) [above = of P] {$\sigma^2$};
    \node[prior] (rho) [above = of w] {$\rho_{1:R}$};
    \node[prior] (alpha) [below = of y] {$\pmb{\alpha}$};
    \path (z) edge (y);
    \path (w) edge (y);
    \path (rho) edge (w);
    \path (alpha) edge (y);
    \path (P) edge (z);
    \path (sigma2) edge (P);
    % moral
    \path[moral] (z) edge (alpha);
    \path[moral] (w) edge (alpha);
    \path[moral] (z) edge (w);
 \end{tikzpicture}
 \end{document}
--- a/knowledge_base/projets-phylo.qmd
+++ b/knowledge_base/projets-phylo.qmd
@ -0,0 +1,178 @@
 ---
 title: "Idées autour de l'inclusion de la phylogénie"
 categories: [phylogénie, graphes, lbm, sbm]
 ---
 {{< include /_macros.tex >}}
 # Contexte de l'inclusion de la phylogénie dans l'estimation de la structure des interactions
 Dans le 3e axe de ma thèse nous souhaitons inclure de l'information phylogénétique dans l'estimation de la structure des réseaux d'interaction microbiens.
 1. Ces réseaux se présentent sous la forme de matrice des comptages hautement rectangulaire, c'est-à-dire avec un grand nombre de microorganismes et, en comparaison, peu d'échantillons (de sols, d'aliments, de patients...). Cette haute dimensionnalité met en échec les méthodes classiques non concues pour gérer autant de noeuds (SBM). Il s'agit donc d'un **premier enjeu** 
 2. Les données de comptages de ces matrices sont compositionnelles : la profondeur de séquençage (le nombre de séquences lues) étant finie, cela implique une dépendance entre les comptages observés. Si une séquence est surexprimée par rapport aux autres, alors que l'abondance réelle des autres n'a pas changée, les comptages observés des autres séquences vont diminuer. Voir [la note sur les données compositionnelles](#note-donnees-compo).
 ::: {#note-donnees-compo .callout-note title="Données compositionelles"}
 Soit $N$ la profondeur de séquençage, $\forall s \in \{1,\dots,s\}, n_s$ le nombre réel de fois où la séquence $s$ est présente, $t = \sum_s n_s$ la somme des séquences totale. Les comptages observés $o_s$ pour la séquence $s$ sont $o_s = \dfrac{n_s}{N}$, et on a $\sum_{s} o_s = \dfrac{1}{N} \sum_{s} n_s$ par construction.
 Et donc à pour un $S$ quelconque on a $o_S = \dfrac{t}{N} - \sum_{s, s\neq S} o_s$ et donc une contrainte sur les $o_s$.
 :::
 Diverses autres enjeux se posent quand on considère ce type de données. Par exemple, l'arbre phylogénétique peut ne pas être directement accessible, ou bien être dominé par un certain clade. Il peut aussi exploser en nombre d'individu à chaque niveau (à relier au point 1).
 ## Formalisme commun
 Dans la suite, nous considèrerons $\mathcal{T}$ l'arbre ayant $L$ niveaux, indexés de $l = 0,\dots,L$ avec $0$ la racine commune et $L$ les feuilles de l'arbre.
 $Y$ la matrice de bi-adjacence encodant le graphe et modélisant les interactions, de taille $n_1\times n_2$.
 $V, X$ les matrices de covariable sur les noeuds en ligne et en colonnes de $Y$. $V$ est de taille $n_1 \times d$ et $X$ est de taille $n_2 \times p$
 # SBM (ou LBM) Séquentiel
 ## Formalisation de l'idée
 Ici on utilise l'arbre phylogénétique afin d'initialiser l'EM variationnel du niveau suivant.
 Concrètement, on ajuste un LBM au niveau $l$, sur la matrice de comptage aggrégées à ce niveau $Y^l$, ce qui donne des probabilités variationnelles $\pmb{\tau}^{1,l},\pmb{\tau}^{2,l}$ qui sont de tailles respectives $n_{1,l} \times Q_{l}$ et $n_{2,l} \times R_{l}$.
 Puis pour tout individu $u\in \text{Child}(i)$, on initialise ses probas $\widetilde{\tau}^{1,l+1}_u = \tau^{1,l}_u + \varepsilon_{u}$, avec $\varepsilon_u \sim \mathcal{N}_{Q_l}(0,\sigma^2)$ et on renormalise $\tau^{1,l+1}_{u} = \dfrac{\widetilde{\tau}^{1,l+1}_u}{\sum_q \widetilde{\tau}^{1,l+1}_{u,q}}$. On ajoute une perturbation afin de ne pas rester bloqué sur le point fixe précédent et de pouvoir donc obtenir les $\tau^{1,l+1}$ à l'issue de l'optimisation.
 ## Limites de l'approche
 Le passage d'information selon l'arbre nous semble intuitivement être une bonne approche et les résultats que nous avons obtenues indique qu'un peu d'information semble passer mais il faut aller profondément dans l'arbre et alors on rencontre le problème du coût computationnel.
 En effet cette méthode ne diminue pas le coût en calcul puisqu'elle calcule un LBM à chacun des $L$ niveaux, au mieux elle donne un point d'initialisation intelligent mais cela semble difficilement applicable à des données réelles.
 # SBM et LBM avec covariables sur les noeuds
 Ce modèle visent à intégrer des covariables de noeuds comme modificateurs des probabilités *a priori* d'appartenance aux groupes.
 Pour la phylogénie, en passant par une MDS ou une autre méthode permettant à partir des distances phylogénétique d'obtenir des "positions" ou des covariables, cela permettrait d'injecter l'a priori phylogénétique dans l'estimation de la structure du réseau. 
 ## Formalisation du modèle
 Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
 \begin{align*}
 Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
 W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
 Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
 \end{align*}
 Voici pour les probas pour les individus en colonne de la matrice d'adjacence :
 \begin{align*}
 \pmb{\beta}_{r}& = \begin{pmatrix}
    \beta_{r,0}\\
    \vdots\\
    \beta_{r,p}
 \end{pmatrix}, & X_{j,\bullet} = \begin{pmatrix}
    1 = x_{0,j} & x_{1,j} & \dots & x_{p,j}
 \end{pmatrix}\\
 X_{j,\bullet} \pmb{\beta}_r& = \beta_{r,0} x_{0,j} + \beta_{r,1} x_{1,j} + \dots + \beta_{r,p} x_{p,j} & \approx \log(\rho_r^j) \\
 B & = \begin{pmatrix}
 \pmb{\beta}_1 \dots \pmb{\beta}_r \dots \pmb{\beta}_R
 \end{pmatrix} & X_{j,\bullet}B \approx \log(\pmb{\rho}^j) \\
 X B & \approx \log((\pmb{\rho}^j)_{j=1,\dots,n_2}) = \log(\pmb{\Rho})\\
 \end{align*}
 avec les $\beta, B$ qui désigne donc les coefficient de la combinaison linéaire et $X$ les covariables des individus (taille $n_2\times p$, $p$ covariables).
 Et pour les probas en lignes du LBM :
 \begin{align*}
 \pmb{\gamma}_{q}& = \begin{pmatrix}
    \gamma_{q,0}\\
    \vdots\\
    \gamma_{q,d}
 \end{pmatrix}, & V_{i,\bullet} = \begin{pmatrix}
    1 = v_{0,i} & v_{1,i} & \dots & v_{d,i}
 \end{pmatrix}\\
 V_{i,\bullet} \pmb{\gamma}_q & = \gamma_{q,0} v_{0,i} + \gamma_{q,1} v_{1,i} + \dots + \gamma_{q,d} v_{d,i} & \approx \log(\pi_q^i) \\
 \Gamma & = \begin{pmatrix}
 \gamma_1 \dots \pmb{\gamma}_q \dots \pmb{\gamma}_Q
 \end{pmatrix} & V_{i,\bullet} \Gamma \approx \log(\pmb{\pi}^i) \\
 V \Gamma & \approx \log((\pmb{\pi}^i)_{i=1,\dots,n_1}) = \log(\pmb{\Pi})
 \end{align*}
 avec les $\gamma, G$ qui désigne donc les coefficient de la combinaison linéaire et $V$ les covariables des individus (taille $n_1\times d$, $d$ covariables).
 ## Preuve de l'identifiabilité
 Soient $B,B^{\prime}$ avec $B_{\bullet,R} = B^{\prime}_{\bullet,R} = \vec{0}_{p+1}$ et $X$ de rang plein tel que $X^{\top}X$ soit inversible.
 \begin{align*}
 &\sigma(XB) = \sigma(XB^{\prime})\\
 &\implies \exists C = \begin{pmatrix}c_1 \\ \vdots \\ c_j \\ \vdots \\ c_{n_2}\end{pmatrix} \in \mathbb{R}^{n_2}, X B = X B^{\prime} + C \pmb{1}_{R}^{\top} \\
 &\implies \exists C \in \mathbb{R}^{n_2}, (X B)_{j,r} = (X B^{\prime})_{j,r} + (C \pmb{1}_{R}^{\top})_{j,r} \\
 &\implies \exists C \in \mathbb{R}^{n_2}, \forall r\in\{1\dots,R\}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \beta_{k,r} = \sum_{k=1}^{p+1} x_{j,k} \beta^{\prime}_{k,r} + c_j\\
 &\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \beta_{k,R} = \sum_{k=1}^{p+1} x_{j,k} \beta^{\prime}_{k,R} + c_j \\
 &\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \times 0 = \sum_{k=1}^{p+1} x_{j,k} \times 0 + c_j \\
 &\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, 0 = 0 + c_j \implies c_j = 0 \\
 &\implies C = \begin{pmatrix} 0 \\ \vdots \\ 0 \end{pmatrix} \text{and thus}, XB = XB^{\prime} \\
 & \implies (X^{\top} X)^{-1}X^{\top} X B = (X^{\top} X)^{-1}X^{\top} X B^{\prime} \implies B=B^{\prime}
 \end{align*}
 ## Inférence
 Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
 $$
 \ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
    + \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r}       \\
    - \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
 $$
 Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Et sous la contrainte d'[identifiabilité](#preuve-de-lidentifiabilité) que l'un des $(\beta_r)_{r=1,\dots,R}$ soit nul, ici $\beta_R = 0$.
 La partie pertinente de l'ELBO devient:
 $$
  P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) =  \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
 $${#eq-modele-covar-prop}
 Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
 \begin{align*}
 \dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
 & = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr] 
 \end{align*}
 ## Implémentation
 J'ai implémenté tout ça dans un *fork* de [blockmodels](https://github.com/GrossSBM/blockmodels). Ce fork est disponible [ici](https://github.com/Polarolouis/blockmodels) et **en cours de relecture par JBL**.
 Pour les détails techniques, j'ai ré-écrit la gestion des *memberships* en R pour passer les covariables et coefficients nécessaires aux calculs. J'ai implémenté une descente de gradient en utilisant un algorithme de type BFGS pour l'optimisation des coefficients de la combinaison linéaire. Et enfin j'ai intégré plusieurs choses dans le package R [sbm](https://github.com/GrossSBM/sbm):
 1. [La gestion des covariables de noeuds](https://github.com/GrossSBM/sbm/tree/nodescovariates)
 2. Le support des [valeurs manquantes](https://github.com/GrossSBM/sbm/tree/feat/NAsupport)
 ## La suite
 Maintenant, Sophie et Pierre gèrent la rédaction de vignettes et de simulations autour de ces fonctionnalités.
 Nous attendons de voir si l'on trouve un jeu de données adaptées pour cette méthode.
 **Limites** : Ce modèle ne permet pas le passage à l'échelle pour les gros réseaux que représentent les matrices de comptage.
 # LBM avec dépendance latente entre les probabilités *a priori*
 ## Formalisation du modèle
 Pierre a proposé que l'on pose une structure latente sur les $\pmb{Z}$. C'est à dire
 \begin{align*}
     & P \sim \Normal_{n_1, K-1} (O_{n_1, K-1}, \Sigma, \sigma^2 Id_{K-1}), \\
     \forall i \in \{1,\dots,n_1\}, & Z_i \mid P_i  \overset{ind}{\sim} \Cat_{K} ({\ilr}^{-1}(P_i) = \pi_{1:K}^{(i)}), \\
     \forall j \in \{1,\dots,n_2\}, & W_j \overset{iid}{\sim} \Cat_R (\rho_{1:R}),\\
     \forall i,j \in \{1,\dots,n_1\}\times\{1,\dots,n_2\}, & Y_{ij} \mid Z_i = k, W_j = r \overset{ind}{\sim} \mathcal{F}(\alpha_{qr}),
 \end{align*}
 avec $\Sigma$, la matrice de variance-covariance déterminée en fonction de l'apparentement (phylogénétique) des noeuds.
 ![Le DAG simplifié du modèle](figs/projets-phylo/dag-simple.pdf)
 # Échantillonnage selon l'arbre
 Afin d'affronter le coût computationnel que représente l'ajustement 
 # *Latent Position Model* (LPM) avec phylogénie des représentations latentes selon la phylogénie 
 ## Classique 
 ## *Deep* LPM
 Possibilité d'utilisé un encodeur qui soit un réseau de neurones et de bénéficier de tous les décodeurs de la littérature LPM.
 **Quel est le lien avec le VGAE ?**
 **Avantage de passage à l'échelle??**
--- a/knowledge_base/vae_wasserstein_gromov.qmd
+++ b/knowledge_base/vae_wasserstein_gromov.qmd
@ -0,0 +1,39 @@
 ---
 title: "Variational Graph AutoEncoder with Wasserstein"
 categories: [convolution, machine learning, vae, graphes]
 ---
 {{< include /_macros.tex >}}
 Suite à la discussion avec Julian j'inscris ce que l'on s'est dit.
 # Idée principale
 Les VAE avec convolution de graphes (GCN) permettent d'apprendre une représentation latente des noeuds d'un graphe basée sur les interactions entre noeuds.
 **Objectif** : apprendre un même encodeur et donc un espace latent structuré pour clusteriser une collection de réseaux sur la base de la structure.
 *Sous-objectif* : pouvoir prendre en compte des covariables (Fused Wasserstein ?).
 Principe du VAE:
 Soit $Y$ une matrice d'adjacence (ou de bi-adjacence pour les graphes bipartites), $X$ une matrice de covariables.
 Soit $D_1$ la matrice des degrés en ligne, $D_2$ la matrice des degrés en colonne.
 $\widetilde{Y} = D_1^{-1/2} Y D_2^{-1/2}$ 
 **à compléter**
 # Apprentissage contrastif
 Puisque l'on voudrait marquer la séparation entre différentes structures de réseaux, on pourrait vouloir faire de l'[apprentissage contrastif pour V(G)AE](https://u9534056.medium.com/an-overview-of-contrastive-learning-fa520f5f2c23).
 ## Hypersphère méga cool
 Il faut creuser : forcer les contraintes des *embeddings* à vivre sur la surface d'une hypersphère car, d'après Julian et la littérature, par rapport à un espace euclidien cela permet d'avoir :
 - position latente bornée : stabilisation de l'apprentissage et évite l'explosion dans une ou plusieurs directions.
 - couverture "uniforme" de la sphère : tendance à faciliter l'apprentissage contrastif, avec l'idée de bien séparer les graphes aux structures différentes.
 [Première source](https://www.envisioning.com/vocab/hyperspherical-representation-learning)
 Le softmax est remplacée par la loi de von Mises-Fisher. D'après [Wikipédia](https://fr.wikipedia.org/wiki/Loi_de_von_Mises-Fisher#Relation_avec_la_loi_normale) équivalent de la loi normale multivariée à covariance isotrope restreinte à l'hypersphère unité.
--- a/suivi/2025-17/2025-17.qmd
+++ b/suivi/2025-17/2025-17.qmd
@ -2,9 +2,8 @@
 title: "Bilan semaine 17 2025 : 24 avril - 25 avril"
 categories: 
  - colBiSBM
-format: 
+
-    html:
+date: 25 04 2025
        embed-resources: true
 ---
 ## A faire
--- a/suivi/2025-18/2025-18.qmd
+++ b/suivi/2025-18/2025-18.qmd
@ -0,0 +1,95 @@
 ---
 title: "Bilan semaine 18 2025 : 28 avril - 2 mai"
 categories: [colBiSBM, inférence]
 date: 2025 05 02
 ---
 ## A faire
 ### Stratégie suite : Inférence
 - Lire biblio fournie Julie, Inférence de réseaux : co-occurence
 - Papier pour comprendre données 
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
 ### Rédaction article
 - Relire intro St Clair
 - S'inspirer structure pour mon intro
 - Trouver biblio intro
 - Rédiger l'intro
 - Regarder les applications pour les collections de réseaux recommender system
 - Lire les papiers de Baldock Traveset Souza Cordeniz Trojelsgaard et Gibson
 - Dire résultats nettement meilleurs et variabilités inférieures.
 - Intégrer les retours de Sophie
 ### Simulations article
 - Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
 - Corriger structure de simus :
    - Pour noisy $\alpha$ : 
        - Logit pour envoyer la gaussienne vers (0,1)
        - Beta contrainte dans (0,1)
    - Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées. 
 ### Applications
 - Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
 Car densités déséquilibrées.
 ![](figs/density-subdore.png)
 ### Autour de l'article et du package
 - Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
 ## J'ai fait
 ### JdS 
 - colDEM CSE
 ### Clustering exhaustif Baldock
 - Le clustering de toutes les 52 partitions s'est fait en 5h30 ! (Mémoïsation)
 - Pour iid la meilleure partition avec $BICL=-9466.911$ contre $BICL_{algo} = -9466.873 \pm 0.02205$ trouvé avec l'algo
 ![best_iid](figs/partition-iid.svg)
 - Pour $\pi\rho$ la meilleure partition avec $BICL = -9497.92$ contre $BICL_{algo} =-9497.92 \pm 0.00009$
 ![best_pirho](figs/partition-pirho.svg)
 ## A continuer
 - Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever 
 $\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
 Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code). 
 Implémenté les missing steps *en attente des résultats MIGALE*. 
 - Lire Biological Networks -  François Képès
 - Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
 En attente résultats MIGALE
 ### Applications
 - Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
 > Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
 (à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
 ### Simulations article
 - Relancer simulations de clustering avec $M = 30$ où $M_i = 10, \forall i$. En attente retour MIGALE
 Relancer simus clustering avec VEM steps = 10 000 et plus nombreux init pour spectral. Ajouter simu clustering métriques nb sous-collections obtenues. 
 Vérifier les résultats obtenus si ARI = 0. Et augmenter la taille $M = 30$ avec $M_1 = M_2 = M_3 = 10$.
--- a/suivi/2025-18/figs/density-subdore.png
+++ b/suivi/2025-18/figs/density-subdore.png
--- a/suivi/2025-18/figs/partition-iid.svg
+++ b/suivi/2025-18/figs/partition-iid.svg
@ -0,0 +1,403 @@
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" class="httpgd" width="1367.00" height="979.00" viewBox="0 0 1093.60 783.20">
 <defs>
  <style type='text/css'><![CDATA[
    .httpgd line, .httpgd polyline, .httpgd polygon, .httpgd path, .httpgd rect, .httpgd circle {
      fill: none;
      stroke: #000000;
      stroke-linecap: round;
      stroke-linejoin: round;
      stroke-miterlimit: 10.00;
    }
  ]]></style>
 <clipPath id="c0"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c1"><rect x="46.58" y="29.02" width="941.56" height="618.33"/></clipPath>
 <clipPath id="c2"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c3"><rect x="46.58" y="29.02" width="941.56" height="222.24"/></clipPath>
 <clipPath id="c4"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c5"><rect x="46.58" y="29.02" width="941.56" height="222.24"/></clipPath>
 <clipPath id="c6"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c7"><rect x="46.58" y="29.02" width="941.56" height="222.24"/></clipPath>
 <clipPath id="c8"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c9"><rect x="46.58" y="29.02" width="941.56" height="222.24"/></clipPath>
 <clipPath id="c10"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c11"><rect x="307.96" y="134.16" width="780.16" height="285.47"/></clipPath>
 <clipPath id="c12"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c13"><rect x="307.96" y="23.54" width="780.16" height="110.62"/></clipPath>
 <clipPath id="c14"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c15"><rect x="5.48" y="134.16" width="302.48" height="285.47"/></clipPath>
 <clipPath id="c16"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c17"><rect x="677.42" y="141.63" width="109.63" height="109.63"/></clipPath>
 <clipPath id="c18"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c19"><rect x="659.15" y="29.02" width="146.17" height="73.09"/></clipPath>
 <clipPath id="c20"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c21"><rect x="64.90" y="141.63" width="219.26" height="109.63"/></clipPath>
 <clipPath id="c22"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c23"><rect x="46.58" y="425.11" width="941.56" height="222.24"/></clipPath>
 <clipPath id="c24"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c25"><rect x="46.58" y="425.11" width="941.56" height="222.24"/></clipPath>
 <clipPath id="c26"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c27"><rect x="46.58" y="425.11" width="941.56" height="222.24"/></clipPath>
 <clipPath id="c28"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c29"><rect x="46.58" y="425.11" width="941.56" height="222.24"/></clipPath>
 <clipPath id="c30"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c31"><rect x="320.63" y="530.25" width="767.49" height="247.47"/></clipPath>
 <clipPath id="c32"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c33"><rect x="320.63" y="419.63" width="767.49" height="110.62"/></clipPath>
 <clipPath id="c34"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c35"><rect x="5.48" y="530.25" width="315.15" height="247.47"/></clipPath>
 <clipPath id="c36"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c37"><rect x="628.21" y="537.72" width="182.72" height="109.63"/></clipPath>
 <clipPath id="c38"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c39"><rect x="536.85" y="425.11" width="365.44" height="73.09"/></clipPath>
 <clipPath id="c40"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 <clipPath id="c41"><rect x="46.58" y="547.78" width="268.57" height="89.52"/></clipPath>
 <clipPath id="c42"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
 </defs>
 <rect width="100%" height="100%" style="stroke: none;fill: #FFFFFF;"/>
 <g clip-path="url(#c0)">
 <rect x="0.00" y="0.00" width="1093.60" height="783.20" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c11)">
 <rect x="307.96" y="134.16" width="780.16" height="285.47" style="stroke-width: 2.13;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c13)">
 <rect x="307.96" y="23.54" width="780.16" height="110.62" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c15)">
 <rect x="5.48" y="134.16" width="302.48" height="285.47" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c17)">
 <rect x="677.42" y="141.63" width="109.63" height="109.63" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
 <polyline points="677.42,141.63 787.05,141.63" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="677.42,196.45 787.05,196.45" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="677.42,251.26 787.05,251.26" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="677.42,251.26 677.42,141.63" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="732.24,251.26 732.24,141.63" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="787.05,251.26 787.05,141.63" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="677.42,169.04 787.05,169.04" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="677.42,223.86 787.05,223.86" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="704.83,251.26 704.83,141.63" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="759.65,251.26 759.65,141.63" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <rect x="677.42" y="141.63" width="54.82" height="54.82" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FF9E81;"/>
 <rect x="732.24" y="141.63" width="54.82" height="54.82" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFCBB9;"/>
 <rect x="677.42" y="196.45" width="54.82" height="54.82" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFCEBD;"/>
 <rect x="732.24" y="196.45" width="54.82" height="54.82" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFAF9;"/>
 <line x1="677.42" y1="196.45" x2="787.05" y2="196.45" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="677.42" y1="251.26" x2="787.05" y2="251.26" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="732.24" y1="251.26" x2="732.24" y2="141.63" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="787.05" y1="251.26" x2="787.05" y2="141.63" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="704.83" y="172.98" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.5</text></g>
 <g><text x="759.65" y="172.98" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.27</text></g>
 <g><text x="704.83" y="227.80" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.26</text></g>
 <g><text x="759.65" y="227.80" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.02</text></g>
 <rect x="677.42" y="141.63" width="109.63" height="109.63" style="stroke-width: 2.13;stroke: #333333;"/>
 </g><g clip-path="url(#c18)">
 <g><text x="670.70" y="173.33" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="670.70" y="228.14" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <polyline points="673.69,169.04 677.42,169.04" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="673.69,223.86 677.42,223.86" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="704.83,255.00 704.83,251.26" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="759.65,255.00 759.65,251.26" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <g><text x="704.83" y="266.56" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="759.65" y="266.56" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
 </g><g clip-path="url(#c19)">
 <rect x="659.15" y="29.02" width="146.17" height="73.09" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="665.79" y="38.16" width="6.04" height="54.82" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="671.84" y="38.16" width="126.85" height="54.82" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <g><text x="735.26" y="69.51" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.95</text></g>
 </g><g clip-path="url(#c20)">
 <polyline points="659.15,102.11 659.15,29.02" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="654.22" y="68.71" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="137.92px" lengthAdjust="spacingAndGlyphs">Baldock2011_TB+Baldock2011_JN</text></g>
 <polyline points="656.41,65.57 659.15,65.57" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="659.15,102.11 805.32,102.11" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <polyline points="798.68,104.85 798.68,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="765.46,104.85 765.46,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="732.24,104.85 732.24,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="699.02,104.85 699.02,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="665.79,104.85 665.79,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text x="798.68" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="765.46" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="732.24" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="699.02" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="665.79" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <g><text x="732.24" y="126.04" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="104.64px" lengthAdjust="spacingAndGlyphs">Column proportions</text></g>
 </g><g clip-path="url(#c21)">
 <rect x="64.90" y="141.63" width="219.26" height="109.63" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="64.90" y="141.63" width="219.26" height="2.45" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="64.90" y="144.08" width="219.26" height="107.18" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <g><text x="174.53" y="201.61" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.98</text></g>
 </g><g clip-path="url(#c22)">
 <polyline points="64.90,251.26 64.90,141.63" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="59.97" y="254.41" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="59.97" y="227.00" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="59.97" y="199.59" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="59.97" y="172.18" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="59.97" y="144.78" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <polyline points="62.16,251.26 64.90,251.26" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="62.16,223.86 64.90,223.86" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="62.16,196.45 64.90,196.45" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="62.16,169.04 64.90,169.04" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="62.16,141.63 64.90,141.63" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text transform="translate(37.14,196.45) rotate(-90.00)" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="86.46px" lengthAdjust="spacingAndGlyphs">Row proportions</text></g>
 <polyline points="64.90,251.26 284.16,251.26" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <polyline points="174.53,254.00 174.53,251.26" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text transform="translate(177.68,256.20) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="137.92px" lengthAdjust="spacingAndGlyphs">Baldock2011_TB+Baldock2011_JN</text></g>
 <rect x="999.10" y="6.12" width="63.50" height="123.13" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1006.57" y="26.11" style="font-family: Arimo;font-size: 15.00px;" textLength="39.20px" lengthAdjust="spacingAndGlyphs">alpha</text></g>
 <g><image  x="1006.57" y="35.38" width="17.28" height="86.40" preserveAspectRatio="none"  xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAEsCAYAAAACUNnVAAAAeUlEQVQ4jcWPSw6AMAhEH2O9/5F1Y2pSARtL4oYA82HggEPsxlWaEM0QmyEkp+uj3WMvFnUTo7vjSRkVOQUEEZDLzBytJ3u/EbrMAHXkYYfzUbxLXyVV1LhMp59UjGhotRrthxhFpim69GVN8OjGR5c4ZKFLYeaEdwL6pgZTBMOKRwAAAABJRU5ErkJggg=="/></g>
 <polyline points="1020.40,121.64 1023.85,121.64" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1020.40,100.11 1023.85,100.11" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1020.40,78.58 1023.85,78.58" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1020.40,57.05 1023.85,57.05" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1020.40,35.53 1023.85,35.53" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,121.64 1006.57,121.64" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,100.11 1006.57,100.11" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,78.58 1006.57,78.58" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,57.05 1006.57,57.05" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,35.53 1006.57,35.53" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <g><text x="1031.32" y="125.92" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="1031.32" y="104.39" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="1031.32" y="82.87" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="1031.32" y="61.34" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="1031.32" y="39.81" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <rect x="999.10" y="140.21" width="81.55" height="61.50" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1004.58" y="154.87" style="font-family: Arimo;font-size: 11.00px;" textLength="70.59px" lengthAdjust="spacingAndGlyphs">Column block</text></g>
 <rect x="1004.58" y="161.67" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="162.38" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="1004.58" y="178.95" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="179.66" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <g><text x="1027.34" y="173.45" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="1027.34" y="190.73" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <rect x="999.10" y="212.67" width="63.37" height="61.50" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1004.58" y="227.32" style="font-family: Arimo;font-size: 11.00px;" textLength="52.41px" lengthAdjust="spacingAndGlyphs">Row block</text></g>
 <rect x="1004.58" y="234.12" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="234.83" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="1004.58" y="251.40" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="252.11" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <g><text x="1027.34" y="245.91" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="1027.34" y="263.19" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
 </g><g clip-path="url(#c31)">
 <rect x="320.63" y="530.25" width="767.49" height="247.47" style="stroke-width: 2.13;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c33)">
 <rect x="320.63" y="419.63" width="767.49" height="110.62" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c35)">
 <rect x="5.48" y="530.25" width="315.15" height="247.47" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c37)">
 <rect x="628.21" y="537.72" width="182.72" height="109.63" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
 <polyline points="628.21,537.72 810.93,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="628.21,574.27 810.93,574.27" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="628.21,610.81 810.93,610.81" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="628.21,647.35 810.93,647.35" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="628.21,647.35 628.21,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="664.76,647.35 664.76,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="701.30,647.35 701.30,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="737.84,647.35 737.84,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="774.39,647.35 774.39,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="810.93,647.35 810.93,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="628.21,555.99 810.93,555.99" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="628.21,592.54 810.93,592.54" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="628.21,629.08 810.93,629.08" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="646.48,647.35 646.48,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="683.03,647.35 683.03,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="719.57,647.35 719.57,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="756.11,647.35 756.11,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="792.66,647.35 792.66,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <rect x="628.21" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFBCA7;"/>
 <rect x="664.76" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFB29A;"/>
 <rect x="701.30" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFBBA5;"/>
 <rect x="737.84" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF6F2;"/>
 <rect x="774.39" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFD3C4;"/>
 <rect x="628.21" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FF9B7E;"/>
 <rect x="664.76" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFC9B6;"/>
 <rect x="701.30" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFDFD3;"/>
 <rect x="737.84" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF8F5;"/>
 <rect x="774.39" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFAF8;"/>
 <rect x="628.21" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFDBCF;"/>
 <rect x="664.76" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF2ED;"/>
 <rect x="701.30" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFCFB;"/>
 <rect x="737.84" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFEFD;"/>
 <rect x="774.39" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFFFE;"/>
 <line x1="628.21" y1="574.27" x2="810.93" y2="574.27" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="628.21" y1="610.81" x2="810.93" y2="610.81" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="628.21" y1="647.35" x2="810.93" y2="647.35" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="664.76" y1="647.35" x2="664.76" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="701.30" y1="647.35" x2="701.30" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="737.84" y1="647.35" x2="737.84" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="774.39" y1="647.35" x2="774.39" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="810.93" y1="647.35" x2="810.93" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="646.48" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.35</text></g>
 <g><text x="683.03" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.4</text></g>
 <g><text x="719.57" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.35</text></g>
 <g><text x="756.11" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.05</text></g>
 <g><text x="792.66" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.23</text></g>
 <g><text x="646.48" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.51</text></g>
 <g><text x="683.03" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.28</text></g>
 <g><text x="719.57" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.17</text></g>
 <g><text x="756.11" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.04</text></g>
 <g><text x="792.66" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.03</text></g>
 <g><text x="646.48" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.19</text></g>
 <g><text x="683.03" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.07</text></g>
 <g><text x="719.57" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.02</text></g>
 <g><text x="756.11" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.01</text></g>
 <g><text x="792.66" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="6.31px" lengthAdjust="spacingAndGlyphs">0</text></g>
 <rect x="628.21" y="537.72" width="182.72" height="109.63" style="stroke-width: 2.13;stroke: #333333;"/>
 </g><g clip-path="url(#c38)">
 <g><text x="621.49" y="560.28" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="621.49" y="596.82" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <g><text x="621.49" y="633.37" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">3</text></g>
 <polyline points="624.48,555.99 628.21,555.99" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="624.48,592.54 628.21,592.54" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="624.48,629.08 628.21,629.08" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="646.48,651.09 646.48,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="683.03,651.09 683.03,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="719.57,651.09 719.57,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="756.11,651.09 756.11,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="792.66,651.09 792.66,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <g><text x="646.48" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="683.03" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <g><text x="719.57" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">3</text></g>
 <g><text x="756.11" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">4</text></g>
 <g><text x="792.66" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">5</text></g>
 </g><g clip-path="url(#c39)">
 <rect x="536.85" y="425.11" width="365.44" height="73.09" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="553.46" y="427.72" width="1.74" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="553.46" y="445.12" width="6.59" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="553.46" y="462.53" width="2.27" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="553.46" y="479.93" width="4.25" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="555.20" y="427.72" width="11.96" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="560.05" y="445.12" width="8.82" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="555.74" y="462.53" width="17.26" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="557.72" y="479.93" width="16.85" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="567.16" y="427.72" width="43.23" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="568.87" y="445.12" width="31.53" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="572.99" y="462.53" width="43.96" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="574.56" y="479.93" width="40.32" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="610.40" y="427.72" width="122.46" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="600.40" y="445.12" width="130.24" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="616.95" y="462.53" width="122.77" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="614.88" y="479.93" width="104.95" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="732.85" y="427.72" width="152.83" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <rect x="730.63" y="445.12" width="155.05" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <rect x="739.72" y="462.53" width="145.96" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <rect x="719.83" y="479.93" width="165.84" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <g><text x="588.78" y="439.49" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
 <g><text x="594.97" y="474.30" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
 <g><text x="594.72" y="491.70" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.12</text></g>
 <g><text x="671.62" y="439.49" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.37</text></g>
 <g><text x="665.51" y="456.89" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.39</text></g>
 <g><text x="678.34" y="474.30" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.37</text></g>
 <g><text x="667.36" y="491.70" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.32</text></g>
 <g><text x="809.26" y="439.49" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.46</text></g>
 <g><text x="808.15" y="456.89" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.47</text></g>
 <g><text x="812.70" y="474.30" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.44</text></g>
 <g><text x="802.76" y="491.70" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.5</text></g>
 </g><g clip-path="url(#c40)">
 <polyline points="536.85,498.20 536.85,425.11" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="531.92" y="490.90" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="90.48px" lengthAdjust="spacingAndGlyphs">Baldock2019_Reading</text></g>
 <g><text x="531.92" y="473.50" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="80.74px" lengthAdjust="spacingAndGlyphs">Baldock2019_Leeds</text></g>
 <g><text x="531.92" y="456.10" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="99.91px" lengthAdjust="spacingAndGlyphs">Baldock2019_Edinburgh</text></g>
 <g><text x="531.92" y="438.70" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="83.19px" lengthAdjust="spacingAndGlyphs">Baldock2019_Bristol</text></g>
 <polyline points="534.11,487.76 536.85,487.76" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="534.11,470.36 536.85,470.36" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="534.11,452.96 536.85,452.96" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="534.11,435.55 536.85,435.55" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="536.85,498.20 902.29,498.20" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <polyline points="885.68,500.94 885.68,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="802.62,500.94 802.62,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="719.57,500.94 719.57,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="636.52,500.94 636.52,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="553.46,500.94 553.46,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text x="885.68" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="802.62" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="719.57" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="636.52" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="553.46" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <g><text x="719.57" y="522.13" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="104.64px" lengthAdjust="spacingAndGlyphs">Column proportions</text></g>
 </g><g clip-path="url(#c41)">
 <rect x="46.58" y="547.78" width="268.57" height="89.52" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="46.58" y="547.78" width="61.98" height="2.49" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="115.45" y="547.78" width="61.98" height="4.16" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="184.31" y="547.78" width="61.98" height="4.26" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="253.17" y="547.78" width="61.98" height="1.60" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="46.58" y="550.26" width="61.98" height="8.69" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="115.45" y="551.94" width="61.98" height="10.41" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="184.31" y="552.03" width="61.98" height="15.62" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="253.17" y="549.38" width="61.98" height="12.68" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="46.58" y="558.95" width="61.98" height="78.35" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <rect x="115.45" y="562.35" width="61.98" height="74.95" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <rect x="184.31" y="567.65" width="61.98" height="69.65" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <rect x="253.17" y="562.06" width="61.98" height="75.24" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <g><text x="146.43" y="561.08" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.12</text></g>
 <g><text x="215.30" y="563.78" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.17</text></g>
 <g><text x="284.16" y="559.66" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.14</text></g>
 <g><text x="77.57" y="602.07" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.88</text></g>
 <g><text x="146.43" y="603.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.84</text></g>
 <g><text x="215.30" y="606.41" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.78</text></g>
 <g><text x="284.16" y="603.62" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.84</text></g>
 </g><g clip-path="url(#c42)">
 <polyline points="46.58,637.30 46.58,547.78" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="41.65" y="640.44" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="41.65" y="618.06" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="41.65" y="595.68" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="41.65" y="573.30" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="41.65" y="550.92" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <polyline points="43.84,637.30 46.58,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="43.84,614.92 46.58,614.92" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="43.84,592.54 46.58,592.54" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="43.84,570.16 46.58,570.16" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="43.84,547.78 46.58,547.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text transform="translate(18.81,592.54) rotate(-90.00)" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="86.46px" lengthAdjust="spacingAndGlyphs">Row proportions</text></g>
 <polyline points="46.58,637.30 315.15,637.30" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <polyline points="77.57,640.04 77.57,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="146.43,640.04 146.43,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="215.30,640.04 215.30,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="284.16,640.04 284.16,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text transform="translate(80.71,642.23) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="83.19px" lengthAdjust="spacingAndGlyphs">Baldock2019_Bristol</text></g>
 <g><text transform="translate(149.58,642.23) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="99.91px" lengthAdjust="spacingAndGlyphs">Baldock2019_Edinburgh</text></g>
 <g><text transform="translate(218.44,642.23) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="80.74px" lengthAdjust="spacingAndGlyphs">Baldock2019_Leeds</text></g>
 <g><text transform="translate(287.31,642.23) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="90.48px" lengthAdjust="spacingAndGlyphs">Baldock2019_Reading</text></g>
 <rect x="999.10" y="393.57" width="63.50" height="123.13" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1006.57" y="413.56" style="font-family: Arimo;font-size: 15.00px;" textLength="39.20px" lengthAdjust="spacingAndGlyphs">alpha</text></g>
 <g><image  x="1006.57" y="422.83" width="17.28" height="86.40" preserveAspectRatio="none"  xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAEsCAYAAAACUNnVAAAAeUlEQVQ4jcWPSw6AMAhEH2O9/5F1Y2pSARtL4oYA82HggEPsxlWaEM0QmyEkp+uj3WMvFnUTo7vjSRkVOQUEEZDLzBytJ3u/EbrMAHXkYYfzUbxLXyVV1LhMp59UjGhotRrthxhFpim69GVN8OjGR5c4ZKFLYeaEdwL6pgZTBMOKRwAAAABJRU5ErkJggg=="/></g>
 <polyline points="1020.40,509.09 1023.85,509.09" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1020.40,487.56 1023.85,487.56" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1020.40,466.03 1023.85,466.03" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1020.40,444.50 1023.85,444.50" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1020.40,422.97 1023.85,422.97" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,509.09 1006.57,509.09" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,487.56 1006.57,487.56" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,466.03 1006.57,466.03" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,444.50 1006.57,444.50" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1010.03,422.97 1006.57,422.97" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <g><text x="1031.32" y="513.37" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="1031.32" y="491.84" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="1031.32" y="470.31" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="1031.32" y="448.79" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="1031.32" y="427.26" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <rect x="999.10" y="527.66" width="81.55" height="78.78" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1004.58" y="542.32" style="font-family: Arimo;font-size: 11.00px;" textLength="70.59px" lengthAdjust="spacingAndGlyphs">Column block</text></g>
 <rect x="1004.58" y="549.12" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="549.83" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="1037.85" y="549.12" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1038.56" y="549.83" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="1004.58" y="566.40" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="567.11" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="1037.85" y="566.40" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1038.56" y="567.11" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="1004.58" y="583.68" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="584.39" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <g><text x="1027.34" y="560.90" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="1060.61" y="560.90" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <g><text x="1027.34" y="578.18" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">3</text></g>
 <g><text x="1060.61" y="578.18" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">4</text></g>
 <g><text x="1027.34" y="595.46" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">5</text></g>
 <rect x="999.10" y="617.40" width="72.02" height="61.50" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1004.58" y="632.05" style="font-family: Arimo;font-size: 11.00px;" textLength="52.41px" lengthAdjust="spacingAndGlyphs">Row block</text></g>
 <rect x="1004.58" y="638.85" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="639.56" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="1037.85" y="638.85" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1038.56" y="639.56" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="1004.58" y="656.13" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1005.29" y="656.84" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <g><text x="1027.34" y="650.64" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="1060.61" y="650.64" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <g><text x="1027.34" y="667.92" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">3</text></g>
 <g><text x="545.80" y="14.90" text-anchor="middle" style="font-family: Arimo;font-size: 13.20px;" textLength="83.92px" lengthAdjust="spacingAndGlyphs">Best partition</text></g>
 </g>
 </svg>
--- a/suivi/2025-18/figs/partition-pirho.svg
+++ b/suivi/2025-18/figs/partition-pirho.svg
@ -0,0 +1,272 @@
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" class="httpgd" width="1483.00" height="992.00" viewBox="0 0 1186.40 793.60">
 <defs>
  <style type='text/css'><![CDATA[
    .httpgd line, .httpgd polyline, .httpgd polygon, .httpgd path, .httpgd rect, .httpgd circle {
      fill: none;
      stroke: #000000;
      stroke-linecap: round;
      stroke-linejoin: round;
      stroke-miterlimit: 10.00;
    }
  ]]></style>
 <clipPath id="c0"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
 <clipPath id="c1"><rect x="46.58" y="10.96" width="1034.36" height="608.79"/></clipPath>
 <clipPath id="c2"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
 <clipPath id="c3"><rect x="338.90" y="270.72" width="842.02" height="517.40"/></clipPath>
 <clipPath id="c4"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
 <clipPath id="c5"><rect x="338.90" y="5.48" width="842.02" height="265.24"/></clipPath>
 <clipPath id="c6"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
 <clipPath id="c7"><rect x="5.48" y="270.72" width="333.42" height="517.40"/></clipPath>
 <clipPath id="c8"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
 <clipPath id="c9"><rect x="509.47" y="278.19" width="569.27" height="341.56"/></clipPath>
 <clipPath id="c10"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
 <clipPath id="c11"><rect x="507.27" y="67.45" width="573.68" height="114.74"/></clipPath>
 <clipPath id="c12"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
 <clipPath id="c13"><rect x="46.58" y="401.16" width="286.84" height="95.61"/></clipPath>
 <clipPath id="c14"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
 </defs>
 <rect width="100%" height="100%" style="stroke: none;fill: #FFFFFF;"/>
 <g clip-path="url(#c0)">
 <rect x="-0.00" y="0.00" width="1186.40" height="793.60" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c3)">
 <rect x="338.90" y="270.72" width="842.02" height="517.40" style="stroke-width: 2.13;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c5)">
 <rect x="338.90" y="5.48" width="842.02" height="265.24" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c7)">
 <rect x="5.48" y="270.72" width="333.42" height="517.40" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
 </g><g clip-path="url(#c9)">
 <rect x="509.47" y="278.19" width="569.27" height="341.56" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
 <polyline points="509.47,278.19 1078.74,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="509.47,392.04 1078.74,392.04" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="509.47,505.90 1078.74,505.90" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="509.47,619.75 1078.74,619.75" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="509.47,619.75 509.47,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="623.32,619.75 623.32,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="737.18,619.75 737.18,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="851.03,619.75 851.03,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="964.89,619.75 964.89,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="1078.74,619.75 1078.74,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="509.47,335.12 1078.74,335.12" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="509.47,448.97 1078.74,448.97" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="509.47,562.82 1078.74,562.82" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="566.40,619.75 566.40,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="680.25,619.75 680.25,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="794.10,619.75 794.10,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="907.96,619.75 907.96,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <polyline points="1021.81,619.75 1021.81,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
 <rect x="509.47" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFC1AC;"/>
 <rect x="623.32" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFA98E;"/>
 <rect x="737.18" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFBAA4;"/>
 <rect x="851.03" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFECE5;"/>
 <rect x="964.89" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFBBA5;"/>
 <rect x="509.47" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FF9A7D;"/>
 <rect x="623.32" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFC6B3;"/>
 <rect x="737.18" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFDDD0;"/>
 <rect x="851.03" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF8F5;"/>
 <rect x="964.89" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFAF9;"/>
 <rect x="509.47" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFDBCF;"/>
 <rect x="623.32" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF2ED;"/>
 <rect x="737.18" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFCFB;"/>
 <rect x="851.03" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFEFD;"/>
 <rect x="964.89" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFFFF;"/>
 <line x1="509.47" y1="392.04" x2="1078.74" y2="392.04" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="509.47" y1="505.90" x2="1078.74" y2="505.90" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="509.47" y1="619.75" x2="1078.74" y2="619.75" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="623.32" y1="619.75" x2="623.32" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="737.18" y1="619.75" x2="737.18" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="851.03" y1="619.75" x2="851.03" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="964.89" y1="619.75" x2="964.89" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <line x1="1078.74" y1="619.75" x2="1078.74" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="566.40" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.32</text></g>
 <g><text x="680.25" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.45</text></g>
 <g><text x="794.10" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.36</text></g>
 <g><text x="907.96" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.1</text></g>
 <g><text x="1021.81" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.35</text></g>
 <g><text x="566.40" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.52</text></g>
 <g><text x="680.25" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.3</text></g>
 <g><text x="794.10" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.18</text></g>
 <g><text x="907.96" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.04</text></g>
 <g><text x="1021.81" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.02</text></g>
 <g><text x="566.40" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.19</text></g>
 <g><text x="680.25" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.07</text></g>
 <g><text x="794.10" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.02</text></g>
 <g><text x="907.96" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.01</text></g>
 <g><text x="1021.81" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="6.31px" lengthAdjust="spacingAndGlyphs">0</text></g>
 <rect x="509.47" y="278.19" width="569.27" height="341.56" style="stroke-width: 2.13;stroke: #333333;"/>
 </g><g clip-path="url(#c10)">
 <g><text x="502.74" y="339.40" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="502.74" y="453.25" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <g><text x="502.74" y="567.11" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">3</text></g>
 <polyline points="505.73,335.12 509.47,335.12" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="505.73,448.97 509.47,448.97" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="505.73,562.82 509.47,562.82" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="566.40,623.49 566.40,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="680.25,623.49 680.25,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="794.10,623.49 794.10,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="907.96,623.49 907.96,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="1021.81,623.49 1021.81,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
 <g><text x="566.40" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="680.25" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <g><text x="794.10" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">3</text></g>
 <g><text x="907.96" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">4</text></g>
 <g><text x="1021.81" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">5</text></g>
 </g><g clip-path="url(#c11)">
 <rect x="507.27" y="67.45" width="573.68" height="114.74" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="533.34" y="70.76" width="0.00" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="533.34" y="92.82" width="2.72" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="533.34" y="114.88" width="11.11" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="533.34" y="136.95" width="3.40" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="533.34" y="159.01" width="6.61" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="533.34" y="70.76" width="12.01" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="536.06" y="92.82" width="18.08" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="544.45" y="114.88" width="12.26" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="536.74" y="136.95" width="26.92" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="539.95" y="159.01" width="25.23" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="545.35" y="70.76" width="11.80" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="554.14" y="92.82" width="68.70" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="556.71" y="114.88" width="38.34" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="563.67" y="136.95" width="68.82" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="565.19" y="159.01" width="67.52" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="557.15" y="70.76" width="47.64" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="622.85" y="92.82" width="317.75" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="595.06" y="114.88" width="388.85" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="632.49" y="136.95" width="328.64" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="632.71" y="159.01" width="216.63" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="604.79" y="70.76" width="450.07" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <rect x="940.60" y="92.82" width="114.27" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <rect x="983.90" y="114.88" width="70.96" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <rect x="961.14" y="136.95" width="93.73" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <rect x="849.35" y="159.01" width="205.52" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <g><text x="588.49" y="106.69" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
 <g><text x="598.08" y="150.82" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
 <g><text x="598.95" y="172.88" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
 <g><text x="781.72" y="106.69" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.61</text></g>
 <g><text x="789.48" y="128.75" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="796.81" y="150.82" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.63</text></g>
 <g><text x="741.03" y="172.88" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.42</text></g>
 <g><text x="829.83" y="84.62" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.86</text></g>
 <g><text x="997.73" y="106.69" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.22</text></g>
 <g><text x="1019.39" y="128.75" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.14</text></g>
 <g><text x="1008.00" y="150.82" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.18</text></g>
 <g><text x="952.11" y="172.88" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.39</text></g>
 </g><g clip-path="url(#c12)">
 <polyline points="507.27,182.18 507.27,67.45" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="502.34" y="172.08" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="90.48px" lengthAdjust="spacingAndGlyphs">Baldock2019_Reading</text></g>
 <g><text x="502.34" y="150.02" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="80.74px" lengthAdjust="spacingAndGlyphs">Baldock2019_Leeds</text></g>
 <g><text x="502.34" y="127.96" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="99.91px" lengthAdjust="spacingAndGlyphs">Baldock2019_Edinburgh</text></g>
 <g><text x="502.34" y="105.89" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="83.19px" lengthAdjust="spacingAndGlyphs">Baldock2019_Bristol</text></g>
 <g><text x="502.34" y="83.83" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="137.92px" lengthAdjust="spacingAndGlyphs">Baldock2011_TB+Baldock2011_JN</text></g>
 <polyline points="504.53,168.94 507.27,168.94" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="504.53,146.88 507.27,146.88" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="504.53,124.81 507.27,124.81" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="504.53,102.75 507.27,102.75" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="504.53,80.68 507.27,80.68" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="507.27,182.18 1080.94,182.18" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <polyline points="1054.87,184.92 1054.87,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="924.49,184.92 924.49,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="794.10,184.92 794.10,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="663.72,184.92 663.72,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="533.34,184.92 533.34,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text x="1054.87" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="924.49" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="794.10" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="663.72" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="533.34" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <g><text x="794.10" y="206.11" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="104.64px" lengthAdjust="spacingAndGlyphs">Column proportions</text></g>
 </g><g clip-path="url(#c13)">
 <rect x="46.58" y="401.16" width="286.84" height="95.61" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="46.58" y="401.16" width="52.68" height="1.59" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="105.12" y="401.16" width="52.68" height="2.13" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="163.66" y="401.16" width="52.68" height="4.46" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="222.20" y="401.16" width="52.68" height="3.80" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="280.73" y="401.16" width="52.68" height="1.15" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="46.58" y="402.76" width="52.68" height="94.02" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="105.12" y="403.29" width="52.68" height="9.23" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="163.66" y="405.62" width="52.68" height="10.37" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="222.20" y="404.96" width="52.68" height="17.41" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="280.73" y="402.31" width="52.68" height="14.17" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="46.58" y="496.78" width="52.68" height="0.00" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <rect x="105.12" y="412.52" width="52.68" height="84.25" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <rect x="163.66" y="416.00" width="52.68" height="80.78" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <rect x="222.20" y="422.37" width="52.68" height="74.41" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <rect x="280.73" y="416.48" width="52.68" height="80.30" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <g><text x="72.92" y="453.71" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.98</text></g>
 <g><text x="190.00" y="414.75" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.11</text></g>
 <g><text x="248.54" y="417.60" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.18</text></g>
 <g><text x="307.08" y="413.33" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.15</text></g>
 <g><text x="131.46" y="458.59" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.88</text></g>
 <g><text x="190.00" y="460.33" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.84</text></g>
 <g><text x="248.54" y="463.51" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.78</text></g>
 <g><text x="307.08" y="460.57" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.84</text></g>
 </g><g clip-path="url(#c14)">
 <polyline points="46.58,496.78 46.58,401.16" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <g><text x="41.65" y="499.92" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="41.65" y="476.02" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="41.65" y="452.11" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="41.65" y="428.21" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="41.65" y="404.31" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <polyline points="43.84,496.78 46.58,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="43.84,472.87 46.58,472.87" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="43.84,448.97 46.58,448.97" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="43.84,425.07 46.58,425.07" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="43.84,401.16 46.58,401.16" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text transform="translate(18.81,448.97) rotate(-90.00)" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="86.46px" lengthAdjust="spacingAndGlyphs">Row proportions</text></g>
 <polyline points="46.58,496.78 333.42,496.78" style="stroke-width: 1.07;stroke-linecap: butt;"/>
 <polyline points="72.92,499.52 72.92,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="131.46,499.52 131.46,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="190.00,499.52 190.00,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="248.54,499.52 248.54,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <polyline points="307.08,499.52 307.08,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
 <g><text transform="translate(76.07,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="137.92px" lengthAdjust="spacingAndGlyphs">Baldock2011_TB+Baldock2011_JN</text></g>
 <g><text transform="translate(134.60,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="83.19px" lengthAdjust="spacingAndGlyphs">Baldock2019_Bristol</text></g>
 <g><text transform="translate(193.14,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="99.91px" lengthAdjust="spacingAndGlyphs">Baldock2019_Edinburgh</text></g>
 <g><text transform="translate(251.68,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="80.74px" lengthAdjust="spacingAndGlyphs">Baldock2019_Leeds</text></g>
 <g><text transform="translate(310.22,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="90.48px" lengthAdjust="spacingAndGlyphs">Baldock2019_Reading</text></g>
 <rect x="1091.90" y="172.70" width="63.50" height="123.13" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1099.37" y="192.68" style="font-family: Arimo;font-size: 15.00px;" textLength="39.20px" lengthAdjust="spacingAndGlyphs">alpha</text></g>
 <g><image  x="1099.37" y="201.95" width="17.28" height="86.40" preserveAspectRatio="none"  xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAEsCAYAAAACUNnVAAAAeUlEQVQ4jcWPSw6AMAhEH2O9/5F1Y2pSARtL4oYA82HggEPsxlWaEM0QmyEkp+uj3WMvFnUTo7vjSRkVOQUEEZDLzBytJ3u/EbrMAHXkYYfzUbxLXyVV1LhMp59UjGhotRrthxhFpim69GVN8OjGR5c4ZKFLYeaEdwL6pgZTBMOKRwAAAABJRU5ErkJggg=="/></g>
 <polyline points="1113.20,288.21 1116.65,288.21" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1113.20,266.68 1116.65,266.68" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1113.20,245.15 1116.65,245.15" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1113.20,223.63 1116.65,223.63" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1113.20,202.10 1116.65,202.10" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1102.83,288.21 1099.37,288.21" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1102.83,266.68 1099.37,266.68" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1102.83,245.15 1099.37,245.15" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1102.83,223.63 1099.37,223.63" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <polyline points="1102.83,202.10 1099.37,202.10" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
 <g><text x="1124.12" y="292.49" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
 <g><text x="1124.12" y="270.97" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
 <g><text x="1124.12" y="249.44" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
 <g><text x="1124.12" y="227.91" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
 <g><text x="1124.12" y="206.38" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
 <rect x="1091.90" y="306.78" width="81.55" height="78.78" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1097.38" y="321.44" style="font-family: Arimo;font-size: 11.00px;" textLength="70.59px" lengthAdjust="spacingAndGlyphs">Column block</text></g>
 <rect x="1097.38" y="328.24" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1098.09" y="328.95" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
 <rect x="1130.65" y="328.24" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1131.36" y="328.95" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
 <rect x="1097.38" y="345.52" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1098.09" y="346.23" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
 <rect x="1130.65" y="345.52" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1131.36" y="346.23" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
 <rect x="1097.38" y="362.80" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1098.09" y="363.51" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
 <g><text x="1120.14" y="340.02" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="1153.41" y="340.02" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <g><text x="1120.14" y="357.30" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">3</text></g>
 <g><text x="1153.41" y="357.30" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">4</text></g>
 <g><text x="1120.14" y="374.58" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">5</text></g>
 <rect x="1091.90" y="396.52" width="72.02" height="61.50" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <g><text x="1097.38" y="411.17" style="font-family: Arimo;font-size: 11.00px;" textLength="52.41px" lengthAdjust="spacingAndGlyphs">Row block</text></g>
 <rect x="1097.38" y="417.98" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1098.09" y="418.68" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
 <rect x="1130.65" y="417.98" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1131.36" y="418.68" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
 <rect x="1097.38" y="435.26" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
 <rect x="1098.09" y="435.96" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
 <g><text x="1120.14" y="429.76" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
 <g><text x="1153.41" y="429.76" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
 <g><text x="1120.14" y="447.04" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">3</text></g>
 </g>
 </svg>
--- a/suivi/2025-19/2025-19.qmd
+++ b/suivi/2025-19/2025-19.qmd
@ -0,0 +1,156 @@
 ---
 title: "Bilan semaine 19 2025 : 5 mai - 9 mai"
 categories: [colBiSBM, inférence]
 date: 2025 05 09
 ---
 ## TOP PRIORITÉ
 - Débugguer les simulations :
    - Clustering : Relancer simulations de clustering avec $M = 30$ où $M_i = 10, \forall i$. En attente retour MIGALE
    Relancer simus clustering avec VEM steps = 10 000 et plus nombreux init pour spectral. Ajouter simu clustering métriques nb sous-collections obtenues. 
    Vérifier les résultats obtenus si ARI = 0. Et augmenter la taille $M = 30$ avec $M_1 = M_2 = M_3 = 10$. ~~-> BUG, dois creuser mais juste des problèmes techniques.~~
    Le bug venait probablement d'une inadéquation entre la version de *future* et *future.callr*, les résultats temporaires sont encourageant.
    - Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
    En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
 ### Applications
 - Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
 Car densités déséquilibrées.
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Présentations LSD, JdS et ML@Aussois
 - ~~PRÉSENTATION JDS (LSD), durée introuvable, adapter en anglais les slides~~ et voir avec PB et SD.
 - Quel plan ?
 - Quels résultats ? Baldock, Traveset ... (sub-Doré)
 ### Inférence et microbes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Se renseigner techniques d'inférence de réseaux : 
    - covariance (base corrélation et seuil)
    - GraphicalLASSO
    - Co-occurence
 - Lire article multi-niveaux Saint-Clair
 ## A discuter
 - Voir pour TT période du 11 au 14 août
 - Voir pour date CSI car congés avec parents prévu du 29/08 au 12/09.
 ## A faire
 ### Inférence
 - Papier pour comprendre données 
    - ~~Faust et al.~~
    - Abdill et al.
    - Bashan et al.
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
 ### Rédaction article
 - Relire intro St Clair
 - S'inspirer structure pour mon intro
 - Trouver biblio intro
 - Rédiger l'intro
 - Dire résultats nettement meilleurs et variabilités inférieures.
 ## J'ai fait
 ### CSI (en attente contacts PB et SD)
 - Est-ce à moi de contacter Saint-Clair et Sonia/Elisa ? *Pierre et Sophie gèrent*
 - Pierre Gérard a dit oui, il attend les détails
 - Quand : *fin juin début juillet*
 - Liste potentielle :
    - (Saint-Clair)
    - Mahendra
    - Elisa/Sonia
    - Pierre Gérard
 ### Finist'R
 - S'inscrire
 ### ML at Aussois
 - S'inscrire avec abstract court
 - Demander la bourse
 - Détails d'inscriptions : *Je demande une bourse et je m'inscris avec la demande de bourse, Pierre et Sophie font la lettre de recommendation*
 ### Présentation
 - J'ai traduis en anglais ma présentation : [Lien](https://forgemia.inra.fr/louis.lacoste/presentation-colbisbm/-/raw/main/presentation.pdf?ref_type=heads)
 ## A continuer
 ### Applications
 - Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
 > Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
 (à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
 ### Axe inférence
 - Lire biblio fournie Julie, Inférence de réseaux : co-occurence
 > J'ai lu Faust et al. 
 > Je lis Abdill et al.
 ## Repoussés ou abandonnés
 - Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever 
 $\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
 Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code). 
 Implémenté les missing steps.
 > Je n'arrive pas à comprendre les erreurs qui arrivent 
 - Lire Biological Networks -  François Képès
 - Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
 :::{#lst-reco-systems lst-cap="Recommender systems data"}
 Par exemple :
 - [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
 :::
 ### Papier plus multi-applications
 - Données d'Elisa herbivore ?
 - Données urbanisations ?
 ### Autour de l'article et du package
 - Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
 ### Simulations article
 - Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
 - Corriger structure de simus :
    - Pour noisy $\alpha$ : 
        - Logit pour envoyer la gaussienne vers (0,1)
        - Beta contrainte dans (0,1)
    - Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées. 
--- a/suivi/2025-20/2025-20.qmd
+++ b/suivi/2025-20/2025-20.qmd
@ -0,0 +1,178 @@
 ---
 title: "Bilan semaine 20 2025 : 12 mai - 16 mai"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 05 16
 ---
 ## TOP PRIORITÉ
 - Pour clustering de collections sur données réelles :
    - ~~Relâcher la pénalité pour les coupes pour proposer modèles.~~
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - Faire le `hclust` avec diverses distances et voir si les coupes proposées diffèrent sensiblement
    - Si plusieurs clustering possibles les tester et sélectionner le 
    meilleur
    - Ré-ajuster les bonnes partitions.
 - Données simulées tester diverses distances.
 - Dé-bugger les simulations :
    - Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
    En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
 - Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
 - Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE. 
 ### Présentations LSD, JdS et ML@Aussois
 - ~~PRÉSENTATION JDS (LSD), durée introuvable, adapter en anglais les slides~~ et voir avec PB et SD.
 - Quel plan ?
 - Quels résultats ? Baldock, Traveset ... (sub-Doré)
 - Pas la peine de préciser l'algo de clustering
 - Indiquer sur une slide le problème de support pour $\pi\rho$ à faire s'il y a 
 le temps.
 - Résultats sur les réseaux Baldock, regarder le positionnement par bloc des 
 espèces communes, regarder les probas d'appartenance aux blocs par espèces 
 communes et par réseau.
 ### Applications
 - Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
 Car densités déséquilibrées.
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Inférence et microbes
 - Lancer *colBiSBM* sur $OTU\times Sample$
 - Se renseigner techniques d'inférence de réseaux : 
    - covariance (base corrélation et seuil)
    - GraphicalLASSO
    - Co-occurence
 - Lancer *colSBM* sur $OTU\times OTU$
 - Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 ## A discuter
 ## A faire
 ### Inférence
 - Papier pour comprendre données 
    - ~~Faust et al.~~
    - Abdill et al.
    - Bashan et al.
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
 ### Rédaction article
 - Relire intro St Clair
 - S'inspirer structure pour mon intro
 - Trouver biblio intro
 - Rédiger l'intro
 - Dire résultats nettement meilleurs et variabilités inférieures.
 ## J'ai fait
 - Dé-bugger les simulations :
    - Clustering : Relancer simulations de clustering avec $M = 30$ où $M_i = 10, \forall i$. En attente retour MIGALE
    Relancer simus clustering avec VEM steps = 10 000 et plus nombreux init pour spectral. Ajouter simu clustering métriques nb sous-collections obtenues. 
    Vérifier les résultats obtenus si ARI = 0. Et augmenter la taille $M = 30$ avec $M_1 = M_2 = M_3 = 10$. ~~-> BUG, dois creuser mais juste des problèmes techniques.~~
    Le bug venait probablement d'une inadéquation entre la version de *future* et *future.callr*, les résultats temporaires sont encourageants.
    **J'ai mis les résultats dans l'article**.
 ### Présentations LSD, JdS et ML@Aussois
 - ~~PRÉSENTATION JDS (LSD), durée introuvable, adapter en anglais les slides~~ et voir avec PB et SD.
 - Quel plan ?
 - Quels résultats ? Baldock, Traveset ... (sub-Doré)
 - Mettre le détails des formules et des algos pour VE et sélection de modèle en 
 annexe.
 - Préciser simplement que l'on utilise un algo VE et un critère type BIC.
 ### VGAE
 - ~~Dé-bugger pourquoi `BipartiteInnerProductDecoder.forward() -> NaN`~~ -> 
 **C'était parce que les features en entrée n'était pas normalisée par les 
 couches de convolutions**. Les meilleurs résultats d'AUC et de précisions que 
 j'obtiens par VGAE sont autour de 0.80.
 ### Inférence et microbes
 - Human Gut Compendium télécharger et préparé les données. Mises au format 
 `edgelist` et liste de matrices et extrait les infos supplémentaires.
 &rarr; trop lourd en RAM pour tourner sur machine perso (optim colSBM...)
 ## A continuer
 ### Applications
 - Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
 > Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
 (à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
 ### Axe inférence
 - Lire biblio fournie Julie, Inférence de réseaux : co-occurence
 > J'ai lu Faust et al. 
 > Je lis Abdill et al.
 ## Repoussés ou abandonnés
 :::{.callout-note collapse="true"}
 ## Déplier pour voir
 - Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever 
 $\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
 Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code). 
 Implémenté les missing steps.
 > Je n'arrive pas à comprendre les erreurs qui arrivent 
 - Lire Biological Networks -  François Képès
 - Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
 :::{#lst-reco-systems lst-cap="Recommender systems data"}
 Par exemple :
 - [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
 :::
 ### Papier plus multi-applications
 - Données d'Elisa herbivore ?
 - Données urbanisations ?
 ### Autour de l'article et du package
 - Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
 ### Simulations article
 - Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
 - Corriger structure de simus :
    - Pour noisy $\alpha$ : 
        - Logit pour envoyer la gaussienne vers (0,1)
        - Beta contrainte dans (0,1)
    - Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées. 
 :::
--- a/suivi/2025-21/2025-21.qmd
+++ b/suivi/2025-21/2025-21.qmd
@ -0,0 +1,154 @@
 ---
 title: "Bilan semaine 21 2025 : 26 mai - 30 mai"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 05 23
 ---
 ## TOP PRIORITÉ
 - ✅ Corriger pour les simus dans l'article : écrire $N = \#\text{ de répétitions}$ 
 - Pour clustering de collections sur données réelles :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - Faire le `hclust` avec diverses distances et voir si les coupes proposées diffèrent sensiblement
    - Si plusieurs clustering possibles les tester et sélectionner le 
    meilleur
    - Ré-ajuster les bonnes partitions.
 - Idée de Sophie : alterner descendant et ascendant &rarr; prometteur aussi
 - Pour les deux propositions données simulées tester diverses distances.
 - Dé-bugger les simulations :
    - Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
    En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
 - Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
 - Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE. 
 ### Présentations LSD, JdS et ML@Aussois
 - ✅ A l'oral pourquoi des réseaux : car de plus en plus disponibles et idée derrière, la structure fonctionnelle permet de comprendre les caractéristiques de l'écosystème décrit
 - ✅ Chercher des réfs pour les méthodes (Hoff Latent Position Model, Nowicki pour LBM, une review pour les métriques voir thèses St Clair et Emré)
 - ✅ Sur slide méthodes différencier métriques micro et macro et dire oralement que nous méso
 - ❎ Indiquer sur une slide le problème de support pour $\pi\rho$ à faire s'il y a 
 le temps.
 - ✅ Résultats sur les réseaux Baldock, regarder le positionnement par bloc des 
 espèces communes, regarder les probas d'appartenance aux blocs par espèces 
 communes et par réseau ➡️ Bourdons
 - ✅ Intégrer les retours de Sophie
 - Attente retours Pierre
 ### Applications
 - Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
 Car densités déséquilibrées.
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 - Comparer les perfs du VAE sur Baldock avec colBiSBM par exemple
 ### Inférence et microbes
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Se renseigner techniques d'inférence de réseaux : 
    - covariance (base corrélation et seuil)
    - GraphicalLASSO
    - Co-occurence
 - Lancer *colSBM* sur $OTU\times OTU$
 - Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Demander à JA si elle connaît des réseaux d'interactions connus par les experts (idée d'intégrer une connaissance experte et de voir les différences de structure par rapport à celle attendue)
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 ## A discuter
 ## A faire
 ### Inférence
 - Papier pour comprendre données 
    - ~~Faust et al.~~
    - Abdill et al.
    - Bashan et al.
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
 ### Rédaction article
 - Relire intro St Clair
 - S'inspirer structure pour mon intro
 - Trouver biblio intro
 - Rédiger l'intro
 - Dire résultats nettement meilleurs et variabilités inférieures.
 ## A continuer
 ### Applications
 - Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
 > Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
 (à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
 ### Axe inférence
 - Lire biblio fournie Julie, Inférence de réseaux : co-occurence
 > J'ai lu Faust et al. 
 > Je lis Abdill et al.
 ## Repoussés ou abandonnés
 :::{.callout-note collapse="true"}
 ## Déplier pour voir
 - Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever 
 $\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
 Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code). 
 Implémenté les missing steps.
 > Je n'arrive pas à comprendre les erreurs qui arrivent 
 - Lire Biological Networks -  François Képès
 - Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
 :::{#lst-reco-systems lst-cap="Recommender systems data"}
 Par exemple :
 - [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
 :::
 ### Papier plus multi-applications
 - Données d'Elisa herbivore ?
 - Données urbanisations ?
 ### Autour de l'article et du package
 - Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
 ### Simulations article
 - Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
 - Corriger structure de simus :
    - Pour noisy $\alpha$ : 
        - Logit pour envoyer la gaussienne vers (0,1)
        - Beta contrainte dans (0,1)
    - Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées. 
 :::
--- a/suivi/2025-22/2025-22.qmd
+++ b/suivi/2025-22/2025-22.qmd
@ -0,0 +1,146 @@
 ---
 title: "Bilan semaine 22 2025 : 26 mai - 30 mai"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 05 28
 ---
 ## TOP PRIORITÉ
 - Pour clustering de collections sur données réelles :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - Faire le `hclust` avec diverses distances et voir si les coupes proposées diffèrent sensiblement
    - Si plusieurs clustering possibles les tester et sélectionner le 
    meilleur
    - Ré-ajuster les bonnes partitions.
 - Idée de Sophie : alterner descendant et ascendant &rarr; prometteur aussi
 - Pour les deux propositions données simulées tester diverses distances.
 - Dé-bugger les simulations :
    - Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
    En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
 - Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
 - Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE. 
 ### Présentations LSD, JdS et ML@Aussois
 - Attente retours Pierre
 ### Applications
 - Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
 Car densités déséquilibrées.
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 - Comparer les perfs du VAE sur Baldock avec colBiSBM par exemple
 ### Inférence et microbes
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Se renseigner techniques d'inférence de réseaux : 
    - covariance (base corrélation et seuil)
    - GraphicalLASSO
    - Co-occurence
 - Lancer *colSBM* sur $OTU\times OTU$
 - Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Demander à JA si elle connaît des réseaux d'interactions connus par les experts (idée d'intégrer une connaissance experte et de voir les différences de structure par rapport à celle attendue)
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 ## A discuter
 ## A faire
 ### Inférence
 - Papier pour comprendre données 
    - ~~Faust et al.~~
    - Abdill et al.
    - Bashan et al.
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
 ### Rédaction article
 - Relire intro St Clair
 - S'inspirer structure pour mon intro
 - Trouver biblio intro
 - Rédiger l'intro
 - Dire résultats nettement meilleurs et variabilités inférieures.
 ## A continuer
 ### Applications
 - Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
 > Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
 (à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
 ### Axe inférence
 - Lire biblio fournie Julie, Inférence de réseaux : co-occurence
 > J'ai lu Faust et al. 
 > Je lis Abdill et al.
 ## Repoussés ou abandonnés
 :::{.callout-note collapse="true"}
 ## Déplier pour voir
 - Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever 
 $\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
 Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code). 
 Implémenté les missing steps.
 > Je n'arrive pas à comprendre les erreurs qui arrivent 
 - Lire Biological Networks -  François Képès
 - Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
 :::{#lst-reco-systems lst-cap="Recommender systems data"}
 Par exemple :
 - [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
 :::
 ### Papier plus multi-applications
 - Données d'Elisa herbivore ?
 - Données urbanisations ?
 ### Autour de l'article et du package
 - Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
 ### Simulations article
 - Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
 - Corriger structure de simus :
    - Pour noisy $\alpha$ : 
        - Logit pour envoyer la gaussienne vers (0,1)
        - Beta contrainte dans (0,1)
    - Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées. 
 :::
--- a/suivi/2025-24/2025-24.qmd
+++ b/suivi/2025-24/2025-24.qmd
@ -0,0 +1,164 @@
 ---
 title: "Bilan semaine 24 2025 : 10 juin - 13 juin"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 06 13
 bibliography: references.bib
 ---
 ## TODO List
 - ✅ Préparer la séance intro à Git pour le 13 juin. **La séance s'est très bien passée**
 - Pour clustering de collections sur données réelles :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - Faire le `hclust` avec diverses distances et voir si les coupes proposées diffèrent sensiblement
    - Si plusieurs clustering possibles les tester et sélectionner le 
    meilleur
    - Ré-ajuster les bonnes partitions.
    - ✅ C'est bon j'ai une fonction qui tourne, mais lentement ⌛
    - ⏳Simulations en train de tourner
    - ❗L'approche que j'ai en mettant la pénalité à 0 peut favoriser de séparer trop les réseaux et donc il faudrait refusionner.
    ➡️ mais le d&a ne fonctionne qu'en *iid*
 - ✅ Idée de Sophie : alterner descendant et ascendant &rarr; prometteur aussi. J'ai codé le fichier de simulations et débugguer le vecteur de clustering ▶️ à voir les performances. ➡️ la simu à 9 réseaux (bcp de variabilité a priori) est lancée attente résultats ➡️ Je tombe sur un bug déjà rencontré dans les simus d'inférence. j'ai lancé sans parallélisation pour essayer de comprendre le bug.
 ✅ Il y avait un bug dans la fenêtre glissant où la condition d'arrêt quand le BICL n'augmentait plus était mal détectée. Corrigé
 ![9 réseaux - ARI pour le clustering avec modèles iid, procédure descendante et descendante&ascendante](figs/ari-clustering-desc&asc9.png)
 :::{layout-ncol="2"}
 ![30 réseaux - ARI pour le clustering avec modèles iid, procédure descendante et descendante&ascendante](figs/ari-clustering-desc&asc30.png)
 ![30 réseaux - # collections à la fin du clustering. La vraie valeur 3 est en rouge pointillés](figs/nbcollections-clustering-descending.png)
 :::
 - Pour les deux propositions données simulées tester diverses distances.
 - Dé-bugger les simulations :
    - Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
    En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
 - Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
 - Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE. 
 - ✅ Réparé mauvais placement des légendes, des valeurs etc.
 ### Applications
 - Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
 Car densités déséquilibrées.
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 - Comparer les perfs du VAE sur Baldock avec colBiSBM par exemple
 ### Inférence et microbes
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Se renseigner techniques d'inférence de réseaux : 
    - covariance (base corrélation et seuil)
    - GraphicalLASSO
    - Co-occurence
 - Lancer *colSBM* sur $OTU\times OTU$
 - Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Demander à JA si elle connaît des réseaux d'interactions connus par les experts (idée d'intégrer une connaissance experte et de voir les différences de structure par rapport à celle attendue)
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 ## Lecture en cours
 ### OT
 - @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes
    - @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 ## A discuter
 ### Inférence
 - Papier pour comprendre données 
    - ~~Faust et al.~~
    - Abdill et al.
    - Bashan et al.
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
 ### Rédaction article
 - Relire intro St Clair
 - S'inspirer structure pour mon intro
 - Trouver biblio intro
 - Rédiger l'intro
 - Dire résultats nettement meilleurs et variabilités inférieures.
 ## A continuer
 ### Applications
 - Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
 > Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
 (à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
 ### Axe inférence
 - Lire biblio fournie Julie, Inférence de réseaux : co-occurence
 > J'ai lu Faust et al. 
 > Je lis Abdill et al.
 ## Repoussés ou abandonnés
 :::{.callout-note collapse="true"}
 ## Déplier pour voir
 - Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever 
 $\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
 Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code). 
 Implémenté les missing steps.
 > Je n'arrive pas à comprendre les erreurs qui arrivent 
 - Lire Biological Networks -  François Képès
 - Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
 :::{#lst-reco-systems lst-cap="Recommender systems data"}
 Par exemple :
 - [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
 :::
 ### Papier plus multi-applications
 - Données d'Elisa herbivore ?
 - Données urbanisations ?
 ### Autour de l'article et du package
 - Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
 ### Simulations article
 - Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
 - Corriger structure de simus :
    - Pour noisy $\alpha$ : 
        - Logit pour envoyer la gaussienne vers (0,1)
        - Beta contrainte dans (0,1)
    - Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées. 
 :::
--- a/suivi/2025-24/figs/ari-clustering-desc&asc30.png
+++ b/suivi/2025-24/figs/ari-clustering-desc&asc30.png
--- a/suivi/2025-24/figs/ari-clustering-desc&asc9.png
+++ b/suivi/2025-24/figs/ari-clustering-desc&asc9.png
--- a/suivi/2025-24/figs/nbcollections-clustering-descending.png
+++ b/suivi/2025-24/figs/nbcollections-clustering-descending.png
--- a/suivi/2025-24/references.bib
+++ b/suivi/2025-24/references.bib
@ -0,0 +1,20 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
--- a/suivi/2025-25/2025-25.qmd
+++ b/suivi/2025-25/2025-25.qmd
@ -0,0 +1,124 @@
 ---
 title: "Bilan semaine 25 2025 : 16 juin - 20 juin"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 06 20
 bibliography: references.bib
 ---
 ## TODO List
 - Pour clustering de collections sur données réelles :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - ✅ Si plusieurs clustering possibles les tester et sélectionner le 
    meilleur
    - ✅ Ré-ajuster les bonnes partitions.
    - ❓Je n'arrive plus à reproduire le bug pour l'inférence...
 - ✅ **Oui c'est bien le cas** Clustering descendant & ascendant : vérifier qu'au cours du temps le $BICL_{asc} \geq BICL_{desc}$
 - Creuser et explorer avec easy16s !
 - ✅ Comparer les perfs du VAE sur Baldock avec colBiSBM par exemple
 <table>
 <caption>AUC values for colBiSBM and VGAE models across cities</caption>
 <thead>
 <tr>
 <th style="empty-cells: hide;border-bottom:hidden;" colspan="1"></th>
 <th style="border-bottom:hidden;padding-bottom:0; padding-left:3px;padding-right:3px;text-align: center; " colspan="2"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">AUC</div></th>
 </tr>
  <tr>
   <th style="text-align:left;"> City </th>
   <th style="text-align:right;"> colBiSBM </th>
   <th style="text-align:right;"> Untuned VGAE </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td style="text-align:left;"> Bristol </td>
   <td style="text-align:right;"> 0.798 </td>
   <td style="text-align:right;"> 0.755 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> Edinburgh </td>
   <td style="text-align:right;"> 0.836 </td>
   <td style="text-align:right;"> 0.774 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> Leeds </td>
   <td style="text-align:right;"> 0.854 </td>
   <td style="text-align:right;"> 0.760 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> Reading </td>
   <td style="text-align:right;"> 0.867 </td>
   <td style="text-align:right;"> 0.740 </td>
  </tr>
 </tbody>
 </table>
 - Dé-bugger les simulations :
    - ⌛ Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
    En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
        - ✅ **Non ça n'a pas l'air d'être ça**. Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
        - ⌛Bon le bug ne se reproduit plus... les jobs sont juste trop longs (> 120h) j'ai relancé, il ne reste que 182/972 conditions.
 - ✅ **Il suffisait de faire la màj soit même...** Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE. 
 - Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
 Car densités déséquilibrées.
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Inférence et microbes
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Demander à JA si elle connaît des réseaux d'interactions connus par les experts (idée d'intégrer une connaissance experte et de voir les différences de structure par rapport à celle attendue)
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 ## Lecture en cours
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ✅ @matchadoNetworkAnalysisMethods2021b ➡️ Nos données étant compositionnelles
 il faut utiliser:
    - CCLasso et SparCC
    - HARMONIES pour zéro inflation (Binomiale négative), COZINE centered log ratio transformation compositionnalité, zéro inflation et forte précision
    - MixMPLN pour générer K réseaux issus de K Poisson log Normal
    - mLDM peut enlever les arêtes indirectes.
 - NetComi agrège plusieurs méthodes tout en permettant l'analyse différentielle !
 Si pas compositionnelles :
 - Meta-Network pour arêtes indirectes et non linéaires
 - Environmentally-Driven Edge detection pour corriger les effets de l'environnement
 ## A discuter
 ### Inférence
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
--- a/suivi/2025-25/references.bib
+++ b/suivi/2025-25/references.bib
@ -0,0 +1,49 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{matchadoNetworkAnalysisMethods2021b,
  title      = {Network Analysis Methods for Studying Microbial Communities: {{A}} Mini Review},
  shorttitle = {Network Analysis Methods for Studying Microbial Communities},
  author     = {Matchado, Monica Steffi and Lauber, Michael and Reitmeier, Sandra and Kacprowski, Tim and Baumbach, Jan and Haller, Dirk and List, Markus},
  year       = {2021},
  month      = jan,
  journal    = {Computational and Structural Biotechnology Journal},
  volume     = {19},
  pages      = {2687--2698},
  issn       = {2001-0370},
  doi        = {10.1016/j.csbj.2021.05.001},
  urldate    = {2025-06-16},
  abstract   = {Microorganisms including bacteria, fungi, viruses, protists and archaea live as communities in complex and contiguous environments. They engage in numerous inter- and intra- kingdom interactions which can be inferred from microbiome profiling data. In particular, network-based approaches have proven helpful in deciphering complex microbial interaction patterns. Here we give an overview of state-of-the-art methods to infer intra-kingdom interactions ranging from simple correlation- to complex conditional dependence-based methods. We highlight common biases encountered in microbial profiles and discuss mitigation strategies employed by different tools and their trade-off with increased computational complexity. Finally, we discuss current limitations that motivate further method development to infer inter-kingdom interactions and to robustly and comprehensively characterize microbial environments in the future.},
  keywords   = {/unread,Microbial co-occurrence networks,Microbial interactions,Network analysis,Trans-kingdom interactions},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-16T16:18:09.496Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/ZCY74M2I/Matchado et al. - 2021 - Network analysis methods for studying microbial communities A mini review.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/BKZN3MI5/S2001037021001823.html}
 }
--- a/suivi/2025-27/2025-27.qmd
+++ b/suivi/2025-27/2025-27.qmd
@ -0,0 +1,109 @@
 ---
 title: "Bilan semaine 27 2025 : 30 juin - 4 juillet"
 categories: [colBiSBM, inférence, GNN]
 date: 2025-06-30
 date-modified: last-modified
 bibliography: references.bib
 ---
 ## TODO List
 - Pour clustering de collections sur données réelles :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - ❓Je n'arrive plus à reproduire le bug pour l'inférence...
    - S'assurer que ça marche et relancer
 - Creuser et explorer avec easy16s !
 - ✅ Ajouter le tableau de comparaison du VGAE avec colBiSBM
 - ⌛ **Calcul du score F1**Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
 - Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
 - Dé-bugger les simulations :
    - ⌛ Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
    En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
        - ⌛Bon le bug ne se reproduit plus... les jobs sont juste trop longs (> 120h) j'ai relancé, il ne reste que 182/972 conditions.
 - ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**. Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
 Car densités déséquilibrées. 
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Inférence et microbes
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - ✅ Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## Lectures en cours 📚
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ✅ @Morton2021.11.09.467939 VAE with Multinomial Logistic Normal distribution using Isometric Log Ratio tranform.
 Plus rapide que les autres méthodes et performances équivalentes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ## A discuter
 ### Congés P&S
 - ✅ Quand est-ce qu'on ne se voit pas ? Et donc quand est-ce qu'on se voit après ?
 - ✅ Calendrier partagé
 ### Thèse
 - ✅ Que prévoir pour le CSI 
    - 👍 Un petit rapport
    - 👍 Une présentation
 - 👨‍🏫 **Demander à Pierre** Comment valider les enseignements comme formations Adum ?
 - ✅ Des recommandations de formations, voir les cours du MathSV
 ### Interprétation écologiques résultats de Baldock
 - ⌛ Point avec Elisa, **oui on relance**
 ### Inférence
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
--- a/suivi/2025-27/references.bib
+++ b/suivi/2025-27/references.bib
@ -0,0 +1,93 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
--- a/suivi/2025-28/2025-28.qmd
+++ b/suivi/2025-28/2025-28.qmd
@ -0,0 +1,141 @@
 ---
 title: "Bilan semaine 28 2025 : 07 juillet - 11 juillet"
 categories: [colBiSBM, inférence, GNN]
 date: 2025-07-07
 date-modified: 2025-07-11
 bibliography: references.bib
 ---
 ## TODO List
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - ❓Je n'arrive plus à reproduire le bug pour l'inférence...
    - S'assurer que ça marche et relancer
 - Creuser et explorer avec easy16s !
 - ⌛ **Calcul du score F1**Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
 - Ajouter au tableau comparatif sep BiSBM
 - Regarder les codes Mangal database pour $\delta$
 - ✅ Formules ci-dessous. Voir $\delta$ mais additif
 :::{.callout-note}
 ### $\delta$ additif Bernoulli
 En Bernoulli pas de forme analytique non plus :
 Pour $\alpha_{qr}$:
 $$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 $$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
 Et pour $\delta_m$:
 $$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 :::
 :::{.callout-note}
 ### $\delta$ additif Poisson
 Forme analytique mais risque de confusion ?
 $$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
 :::
 - Attente retour Pierre pour faire d'autres clustering
 - ✅ Implémenter décodeur Generalized Random Dot Product.
 - ✅ Réimplémentation propre et évolutive du DeepBVGAE (suivi des guidelines PyTorch Geometric)
 - Vérifier si il n'y a pas de data leakage (ie je prends aussi les données de val et de test pour prédire ?)
 - Dé-bugger les simulations :
    - ✅ Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
    En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
 - ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**. 
    - Ajouter le produit par $\delta$ là où nécessaire
    - Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
    - Ajouter les tests unitaires adéquats et les vérifier
 - 🛑**D'abord je lis la biblio dessus** Regarder *Largest gap* sur réseaux Doré
 - Essayer *clustering* sur `supinfo`
 - Homogénéiser notations dans les supplementaries
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Inférence et microbes
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 - HDR VB, chapitre de modèle à blocs latents, bcp travaillé sur bipartite
 OT, comparaison clustering, adaption ARI, *Largest Gap*
 ## Lectures en cours 📚
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ✅ @braultGeneralisationLalgorithmeLargest petit résumé de l'algo de @braultFastConsistentAlgorithm2023
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
 ## A discuter
 ### Congés P&S
 ### Thèse
 - Faire préz CSI
 - Faire rapport CSI
 ### Interprétation écologiques résultats de Baldock
 - ⌛ Point avec Elisa, **oui on relance**
 ### Inférence
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
--- a/suivi/2025-28/references.bib
+++ b/suivi/2025-28/references.bib
@ -0,0 +1,144 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-29/2025-29.qmd
+++ b/suivi/2025-29/2025-29.qmd
@ -0,0 +1,478 @@
 ---
 title: "Bilan semaine 29 2025 : 15 juillet - 18 juillet"
 categories: [colBiSBM, inférence, GNN]
 date: 2025-07-15
 date-modified: last-modified
 bibliography: references.bib
 ---
 ## TODO List
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - ❓Je n'arrive plus à reproduire le bug pour l'inférence...
    - 😫 bug encore. S'assurer que ça marche et relancer
 - ⌛ **En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues**.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
 Pour corriger cet effet :
    - Donner la matrice identité comme features
    - Corriger les degrés calculés.
 - ✅ Ajouter au tableau comparatif sep BiSBM
 - Pour s'assurer que colBiSBM marche, il faut comparer avec une proportion de :
    - *Missing links*, ie des faux zéros
    - *NA* en *Missing at random (MAR)*
 - Faible performances de l'inférence :
    - Vérifier que les conditions d'identifiabilité des modèles fautifs sont bien remplies.
    - Récupérer des jeux de paramètres et essayer de reproduire les résultats.
 - Clustering sur Doré :
    - Désaggréger les réseaux et relancer le clustering sur certains auteurs.
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
 <table>
 <caption>AUC values for colBiSBM, sep-BiSBM and VGAE models across cities</caption>
 <thead>
 <tr>
 <th style="empty-cells: hide;border-bottom:hidden;" colspan="1"></th>
 <th style="border-bottom:hidden;padding-bottom:0; padding-left:3px;padding-right:3px;text-align: center; " colspan="3"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">AUC</div></th>
 </tr>
  <tr>
   <th style="text-align:left;"> City </th>
   <th style="text-align:right;"> colBiSBM </th>
   <th style="text-align:right;"> sep-BiSBM </th>
   <th style="text-align:right;"> Untuned VGAE </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td style="text-align:left;"> Bristol </td>
   <td style="text-align:right;"> 0.841 </td>
   <td style="text-align:right;"> 0.824 </td>
   <td style="text-align:right;"> 1 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> Edinburgh </td>
   <td style="text-align:right;"> 0.882 </td>
   <td style="text-align:right;"> 0.883 </td>
   <td style="text-align:right;"> 1 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> Leeds </td>
   <td style="text-align:right;"> 0.873 </td>
   <td style="text-align:right;"> 0.852 </td>
   <td style="text-align:right;"> 1 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> Reading </td>
   <td style="text-align:right;"> 0.845 </td>
   <td style="text-align:right;"> 0.837 </td>
   <td style="text-align:right;"> 1 </td>
  </tr>
 </tbody>
 </table>
 - Regarder les codes Mangal database pour $\delta$
 - Voir $\delta$ mais additif
 :::{.callout-note}
 ### $\delta$ additif Bernoulli
 En Bernoulli pas de forme analytique non plus :
 Pour $\alpha_{qr}$:
 $$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 $$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
 Et pour $\delta_m$:
 $$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 :::
 :::{.callout-note}
 ### $\delta$ additif Poisson
 Forme analytique mais risque de confusion ?
 $$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
 :::
 - Attente retour Pierre pour faire d'autres clustering
 - Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
 - Inférence finie mais résultats pas fous:
 <table class="table" style="font-size: 10px; margin-left: auto; margin-right: auto;">
 <caption style="font-size: initial !important;">The proportion of dataset where the correct number of blocks is selected.</caption>
 <thead>
 <tr>
 <th style="empty-cells: hide;border-bottom:hidden;" colspan="1"></th>
 <th style="border-bottom:hidden;padding-bottom:0; padding-left:0px;padding-right:0px;text-align: center; " colspan="6"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">iid</div></th>
 <th style="border-bottom:hidden;padding-bottom:0; padding-left:0px;padding-right:0px;text-align: center; " colspan="6"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">$\pi$</div></th>
 <th style="border-bottom:hidden;padding-bottom:0; padding-left:0px;padding-right:0px;text-align: center; " colspan="6"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">$\rho$</div></th>
 <th style="border-bottom:hidden;padding-bottom:0; padding-left:0px;padding-right:0px;text-align: center; " colspan="6"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">$\pi\rho$</div></th>
 </tr>
  <tr>
   <th style="text-align:right;"> $\epsilon_{\alpha}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \lt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} = 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \gt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \lt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} = 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \gt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \lt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} = 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \gt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \lt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} = 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \gt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \lt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} = 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \gt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \lt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} = 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \gt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \lt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} = 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \gt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \lt 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} = 4}$ </th>
   <th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \gt 4}$ </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td style="text-align:right;"> 0.00 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
  </tr>
  <tr>
   <td style="text-align:right;"> 0.03 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
  </tr>
  <tr>
   <td style="text-align:right;"> 0.06 </td>
   <td style="text-align:left;"> 0.19 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.81 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.24 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.76 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.02 $\pm$ 0.01 </td>
   <td style="text-align:left;"> 0.33 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.65 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.26 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.74 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.17 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.83 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.02 $\pm$ 0.01 </td>
   <td style="text-align:left;"> 0.2 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.78 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.06 $\pm$ 0.02 </td>
   <td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.06 $\pm$ 0.02 </td>
   <td style="text-align:left;"> 0.01 $\pm$ 0.01 </td>
   <td style="text-align:left;"> 0.88 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.11 $\pm$ 0.03 </td>
  </tr>
  <tr>
   <td style="text-align:right;"> 0.09 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.94 $\pm$ 0.02 </td>
   <td style="text-align:left;"> 0.06 $\pm$ 0.02 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.91 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.09 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.1 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.9 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
  </tr>
  <tr>
   <td style="text-align:right;"> 0.12 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.94 $\pm$ 0.02 </td>
   <td style="text-align:left;"> 0.06 $\pm$ 0.02 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.91 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.09 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.26 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.74 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.3 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.7 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.84 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.16 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.83 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.17 $\pm$ 0.04 </td>
  </tr>
  <tr>
   <td style="text-align:right;"> 0.15 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.85 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.15 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.86 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.14 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.34 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.66 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.3 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.7 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.81 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.19 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.8 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.2 $\pm$ 0.04 </td>
  </tr>
  <tr>
   <td style="text-align:right;"> 0.18 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.84 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.16 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.36 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.64 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.35 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.65 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.82 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.18 $\pm$ 0.04 </td>
  </tr>
  <tr>
   <td style="text-align:right;"> 0.21 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.92 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.08 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.89 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.11 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.4 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.6 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.39 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.61 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.84 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.16 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.84 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.16 $\pm$ 0.04 </td>
  </tr>
  <tr>
   <td style="text-align:right;"> 0.24 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.88 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.12 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.85 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.15 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.47 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.53 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 1 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.99 $\pm$ 0.01 </td>
   <td style="text-align:left;"> 0.01 $\pm$ 0.01 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.4 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0.6 $\pm$ 0.05 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.85 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0.15 $\pm$ 0.03 </td>
   <td style="text-align:left;"> 0 </td>
   <td style="text-align:left;"> 0.82 $\pm$ 0.04 </td>
   <td style="text-align:left;"> 0.18 $\pm$ 0.04 </td>
  </tr>
 </tbody>
 </table>
 - ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**. 
    - Ajouter le produit par $\delta$ là où nécessaire
    - Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
    - Ajouter les tests unitaires adéquats et les vérifier
 - Regarder *Largest gap* sur réseaux Doré
 - Essayer *clustering* sur `supinfo`
 - Homogénéiser notations dans les supplementaries
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 - Creuser et explorer avec easy16s !
 ### Inférence et microbes
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 - HDR VB, chapitre de modèle à blocs latents, bcp travaillé sur bipartite
 OT, comparaison clustering, adaption ARI, *Largest Gap*
 ## Lectures en cours 📚
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
 ## A discuter
 ### Congés P&S
 ### Thèse
 - Faire préz CSI
 - Faire rapport CSI
 ### Interprétation écologiques résultats de Baldock
 - ⌛ Point avec Elisa, **oui on relance**
 ### Inférence
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
--- a/suivi/2025-29/references.bib
+++ b/suivi/2025-29/references.bib
@ -0,0 +1,144 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-33/2025-33.qmd
+++ b/suivi/2025-33/2025-33.qmd
@ -0,0 +1,158 @@
 ---
 title: "Bilan semaine 33 2025 : 11 août - 15 août"
 categories: [colBiSBM, inférence, GNN]
 date: 2025-08-14
 date-modified: last-modified
 bibliography: references.bib
 ---
 ## TODO List
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - ❓Je n'arrive plus à reproduire le bug pour l'inférence...
    - 😫 bug encore. S'assurer que ça marche et relancer
 - ⌛ **En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues**.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
 Pour corriger cet effet :
    - Donner la matrice identité comme features
    - Corriger les degrés calculés.
 - ✅ Ajouter au tableau comparatif sep BiSBM
 - ✅ Pour s'assurer que colBiSBM marche, il faut comparer avec une proportion de :
    - *Missing links*, ie des faux zéros
    - *NA* en *Missing at random (MAR)*
  ![](figs/auc-model.png)
 - Faible performances de l'inférence :
    - Vérifier que les conditions d'identifiabilité des modèles fautifs sont bien remplies.
    - Récupérer des jeux de paramètres et essayer de reproduire les résultats.
 - Clustering sur Doré :
    - ✅ Désaggréger les réseaux et relancer le clustering sur certains auteurs.
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
        - ⌛ Chamberlain et al semble intéressant à regarder !
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
 - Regarder les codes Mangal database pour $\delta$
 - Voir $\delta$ mais additif
 :::{.callout-note}
 ### $\delta$ additif Bernoulli
 En Bernoulli pas de forme analytique non plus :
 Pour $\alpha_{qr}$:
 $$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 $$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
 Et pour $\delta_m$:
 $$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 :::
 :::{.callout-note}
 ### $\delta$ additif Poisson
 Forme analytique mais risque de confusion ?
 $$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
 :::
 - Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
 - ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**. 
    - Ajouter le produit par $\delta$ là où nécessaire
    - Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
    - Ajouter les tests unitaires adéquats et les vérifier
 - Regarder *Largest gap* sur réseaux Doré
 - Essayer *clustering* sur `supinfo`
 - Homogénéiser notations dans les supplementaries
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Inférence et microbes
 - ✅ Ouvert les donnés Compendium Europe avec easy16s, premières remarques : en dessous de famille peu d'information
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ✅ Intro : Présentation de toutes les recherches, très diversifiée et de l'application aux propriétées théoriques en passant par des codes efficients. Creuser le lien entre *les modèles à var latentes et le transport optimal*. Le chap 4 a l'air intéressant notamment le **mélange de modèles de segmentation**.
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
 ## A discuter
 ### Congés P&S
 ### Thèse
 - Faire préz CSI
 - Faire rapport CSI
 ### Interprétation écologiques résultats de Baldock
 - ⌛ Point avec Elisa, **oui on relance**
 ### Inférence
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
--- a/suivi/2025-33/figs/auc-model.png
+++ b/suivi/2025-33/figs/auc-model.png
--- a/suivi/2025-33/references.bib
+++ b/suivi/2025-33/references.bib
@ -0,0 +1,144 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-35/2025-35.qmd
+++ b/suivi/2025-35/2025-35.qmd
@ -0,0 +1,147 @@
 ---
 title: "Bilan semaine 35 2025 : 25 août - 29 août"
 categories: [colBiSBM, inférence, GNN]
 date: 2025-08-29
 date-modified: last-modified
 bibliography: references.bib
 ---
 ## TODO List
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - ❓Je n'arrive plus à reproduire le bug pour l'inférence...
    - 😫 bug encore. S'assurer que ça marche et relancer
 - ⌛ A Roscoff avec Julie et Pierre nous avons constaté que c'était l'extraction des dyades pour le calcul des métriques qui était incorrecte. Maintenant c'est corrigé et ça fonctionne ! 
 En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
 Pour corriger cet effet :
    - Donner la matrice identité comme features
    - Corriger les degrés calculés.
 - Faible performances de l'inférence :
    - Vérifier que les conditions d'identifiabilité des modèles fautifs sont bien remplies.
    - Récupérer des jeux de paramètres et essayer de reproduire les résultats.
 - Clustering sur Doré :
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
        - ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
 - Regarder les codes Mangal database pour $\delta$
 - Voir $\delta$ mais additif
 :::{.callout-note}
 ### $\delta$ additif Bernoulli
 En Bernoulli pas de forme analytique non plus :
 Pour $\alpha_{qr}$:
 $$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 $$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
 Et pour $\delta_m$:
 $$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 :::
 :::{.callout-note}
 ### $\delta$ additif Poisson
 Forme analytique mais risque de confusion ?
 $$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
 :::
 - Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
 - ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**. 
    - Ajouter le produit par $\delta$ là où nécessaire
    - Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
    - Ajouter les tests unitaires adéquats et les vérifier
 - Regarder *Largest gap* sur réseaux Doré
 - Essayer *clustering* sur `supinfo`
 - Homogénéiser notations dans les supplementaries
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Inférence et microbes
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
    - Voir avec Mahendra à l'occasion du CSI
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
 ## A discuter
 ### Congés P&S
 ### Thèse
 - Faire préz CSI
 - Faire rapport CSI
 ### Inférence
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
--- a/suivi/2025-35/references.bib
+++ b/suivi/2025-35/references.bib
@ -0,0 +1,144 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-38/2025-38.qmd
+++ b/suivi/2025-38/2025-38.qmd
@ -0,0 +1,144 @@
 ---
 title: "Bilan semaine 38 2025 : 15 septembre - 19 septembre"
 categories: [colBiSBM, inférence, GNN]
 date: 2025-09-19
 date-modified: last-modified
 bibliography: references.bib
 ---
 ## TODO List
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - ❓Je n'arrive plus à reproduire le bug pour l'inférence...
    - 😫 bug encore. S'assurer que ça marche et relancer
 - ⌛ A Roscoff avec Julie et Pierre nous avons constaté que c'était l'extraction des dyades pour le calcul des métriques qui était incorrecte. Maintenant c'est corrigé et ça fonctionne ! 
 En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
 Pour corriger cet effet :
    - Donner la matrice identité comme features
    - Corriger les degrés calculés.
 - ⚠️ Discuter intersection simulations
 - Clustering sur Doré :
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
        - ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
 - Regarder les codes Mangal database pour $\delta$
 - Voir $\delta$ mais additif
 :::{.callout-note}
 ### $\delta$ additif Bernoulli
 En Bernoulli pas de forme analytique non plus :
 Pour $\alpha_{qr}$:
 $$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 $$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
 Et pour $\delta_m$:
 $$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 :::
 :::{.callout-note}
 ### $\delta$ additif Poisson
 Forme analytique mais risque de confusion ?
 $$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
 :::
 - Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
 - ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**. 
    - Ajouter le produit par $\delta$ là où nécessaire
    - Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
    - Ajouter les tests unitaires adéquats et les vérifier
 - Regarder *Largest gap* sur réseaux Doré
 - Essayer *clustering* sur `supinfo`
 - Homogénéiser notations dans les supplementaries
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Inférence et microbes
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
    - Voir avec Mahendra à l'occasion du CSI
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
 ## A discuter
 ### Congés P&S
 ### Thèse
 - Faire préz CSI
 - Faire rapport CSI
 ### Inférence
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
--- a/suivi/2025-38/references.bib
+++ b/suivi/2025-38/references.bib
@ -0,0 +1,144 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-43/2025-43.qmd
+++ b/suivi/2025-43/2025-43.qmd
@ -0,0 +1,168 @@
 ---
 title: "Bilan semaine 43 2025 : 20 octobre - 24 octobre"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 10 20
 date-modified: last-modified
 bibliography: references.bib
 ---
 ## TODO List
 - Finir le papier :
    - Re-structurer le plan, mon plan, Donnet et Barbillon, échelle méso et comparaison inter réseau et noeuds non partagés.
    - ✅ Écrire en annexe le BIC-L, faire attention à ajouter l'entropie à la toute fin en mentionnant 
    - ⌛ Fusionner VGAE et information transfer (missing links seulement) donc refaire tourner sur même données qu'en R. A adapter pour Python et pouvoir intégrer dans la figure. (raccourcit).
    - Faire sep-VGAE (seulement sur le réseaux avec missing links) et VGAE avec les 4 réseaux.
        En train de reproduire les résultats, AUC stable autour de 0.7
    - Remplacer *Information tranfer on simu* par Network partitioning. 
    - ⌛ Écrire le poster avec un titre aguicheur "Are my pollinators your pollinators: ...":
        Commencé contenu à déterminer avec Pierre et Sophie
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer 
 <!-- - Idée clustering unipartite graphes des métros
 <div class="embed-container">
    <iframe src="https://csun.uic.edu/wp-content/uploads/sites/1080/2023/12/pdf_7.pdf" width=100% height="475px" style="position: relative;">
    </iframe>
 </div> -->
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
    - ❓Je n'arrive plus à reproduire le bug pour l'inférence...
    - 😫 bug encore. S'assurer que ça marche et relancer
 - ⌛ A Roscoff avec Julie et Pierre nous avons constaté que c'était l'extraction des dyades pour le calcul des métriques qui était incorrecte. Maintenant c'est corrigé et ça fonctionne ! 
 En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
 Pour corriger cet effet :
    - Donner la matrice identité comme features
    - Corriger les degrés calculés.
 - ⚠️ Discuter intersection simulations
 - Clustering sur Doré :
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
        - ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
 - Regarder les codes Mangal database pour $\delta$
 - Voir $\delta$ mais additif
 :::{.callout-note}
 ### $\delta$ additif Bernoulli
 En Bernoulli pas de forme analytique non plus :
 Pour $\alpha_{qr}$:
 $$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 $$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
 Et pour $\delta_m$:
 $$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
 :::
 :::{.callout-note}
 ### $\delta$ additif Poisson
 Forme analytique mais risque de confusion ?
 $$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
 :::
 - Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
 - ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**. 
    - Ajouter le produit par $\delta$ là où nécessaire
    - Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
    - Ajouter les tests unitaires adéquats et les vérifier
 - Regarder *Largest gap* sur réseaux Doré
 - Essayer *clustering* sur `supinfo`
 - Homogénéiser notations dans les supplementaries
 :::{#ref-kmeans-vae}
 - Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
 J'ai commencé à regarder un peu
 :::
 ### Inférence et microbes
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
    - Voir avec Mahendra à l'occasion du CSI
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - Regarder **SPARTA** Rennes
 - Lire Papiers compositional data (Aitchison et al. intro)
 - Lire article multi-niveaux Saint-Clair
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 \begin{align*}
 i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
 Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=}  Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
 \end{align*}
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
 ## A discuter
 ### Congés P&S
 ### Thèse
 - Faire préz CSI
 - Faire rapport CSI
 ### Inférence
 - pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
 > Combine networks at different taxonomic levels
 - Inférence + GREMLINS
--- a/suivi/2025-43/references.bib
+++ b/suivi/2025-43/references.bib
@ -0,0 +1,144 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-44/2025-44.qmd
+++ b/suivi/2025-44/2025-44.qmd
@ -0,0 +1,125 @@
 ---
 title: "Bilan semaine 44 2025 : 27 octobre - 31 octobre"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 10 27
 date-modified: last-modified
 bibliography: references.bib
 ---
 ## TODO List
 - Finir le papier :
    - Re-structurer le plan, mon plan, Donnet et Barbillon, échelle méso et comparaison inter réseau et noeuds non partagés.
    - Partie Baldock: Ajouter l'ordre des modèles préférés
    - Envoyer Info transfer en annexe et remplacer par Network partitioning
    - ✅ Fusionner VGAE et information transfer (missing links seulement) donc refaire tourner sur même données qu'en R. A adapter pour Python et pouvoir intégrer dans la figure. (raccourcit).
    - ✅ Faire sep-VGAE (seulement sur le réseaux avec missing links) et VGAE avec les 4 réseaux.
        En train de reproduire les résultats, AUC stable autour de 0.7
    - Remplacer *Information tranfer on simu* par Network partitioning. 
    - ✅ Écrire le poster avec un titre aguicheur "Are my pollinators your pollinators: ...":
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer 
 <!-- - Idée clustering unipartite graphes des métros
 <div class="embed-container">
    <iframe src="https://csun.uic.edu/wp-content/uploads/sites/1080/2023/12/pdf_7.pdf" width=100% height="475px" style="position: relative;">
    </iframe>
 </div> -->
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
 - 👶 (délégué à stagiaire) Clustering sur Doré :
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
        - ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
    - Regarder *Largest gap* sur réseaux Doré
    - Essayer *clustering* sur `supinfo`
 - ✅ Homogénéiser notations dans les supplementaries
 ### Inférence et microbes
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - 🆕⌛ Papier Julie Negative Binomiale
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 #### Réflexion
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
 - Regarder **SPARTA** Rennes
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 - 🆕 Regarder NetComi
 - 🆕 Regarder OneNet car aggrégation plus robuste
 - 🆕 Réfléchir sens d'aggréger les données ou de les diviser 
 #### Écrire et faire tourner
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - 🆕 SparCC à différent niveaux
 - 🆕 SBM à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2025-44/references.bib
+++ b/suivi/2025-44/references.bib
@ -0,0 +1,144 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-45/2025-45.qmd
+++ b/suivi/2025-45/2025-45.qmd
@ -0,0 +1,125 @@
 ---
 title: "Bilan semaine 45 2025 : 03 novembre - 06 novembre"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 11 03
 date-modified: last-modified
 bibliography: references.bib
 ---
 ## TODO List
 - Finir le papier :
    - ❓ Fait ? Re-structurer le plan, mon plan, Donnet et Barbillon, échelle méso et comparaison inter réseau et noeuds non partagés.
    - ✅ Partie Baldock: Ajouter l'ordre des modèles préférés et vérifier mais BICLsep < BICL pirho < BICL iid 
    - ✅ Toutes les simus en annexe. Envoyer Info transfer en annexe et remplacer par Network partitioning
 - Codes pour le papier :
    - Nettoyer les scripts
    - Faire un joli README
    - ❓Faire des notebooks
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer 
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
 - 👶 (délégué à Mona) Clustering sur Doré :
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
        - ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
    - Regarder *Largest gap* sur réseaux Doré
    - ⌛ Essayer *clustering* sur `supinfo`
        - CAH et Kmeans tendent vers faire $K = 13$ clusters sur les supinfos
        - Enrichir avec des métriques sur les réseaux (nestedness, connectance autres ?)
        - Demander à Elisa pour la signification des métadonnées 
        - Demander à Elisa une fois vu cohérences de groupe voir pour interprétation écologiques ?
        - Algo de clustering sur les groupes trouvés
 ### Inférence et microbes
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - ✅ Papier Julie Negative Binomiale
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 - Idée des groupes sur la base de distance phylogénétique:
    - En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
    - En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
    - Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
    - Lire Papier UniFrac
 #### Réflexion
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
 - Regarder **SPARTA** Rennes
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 - 🆕 Regarder NetComi
 - 🆕 Regarder OneNet car aggrégation plus robuste
 - 🆕 Réfléchir sens d'aggréger les données ou de les diviser 
 #### Écrire et faire tourner
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - 🆕 SparCC à différent niveaux
 - 🆕 SBM à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2025-45/references.bib
+++ b/suivi/2025-45/references.bib
@ -0,0 +1,144 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-50/2025-50.qmd
+++ b/suivi/2025-50/2025-50.qmd
@ -0,0 +1,185 @@
 ---
 title: "Bilan semaine 50 2025 : 08 décembre - 12 décembre"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 12 12
 date-modified: last-modified
 bibliography: references.bib
 # from: markdown+latex_macros
 ---
 {{< include /_macros.tex >}}
 ## TODO List
 - ⚠️ IL Y A UNE TYPO SUR LE SIGNE DE L'ENTROPIE POUR LE PAPIER: $- \mathcal{H}$ au lieu de $+\mathcal{H}$
 - Codes pour le papier :
    - Nettoyer les scripts
    - Faire un joli README
    - ❓Faire des notebooks
 - Faire tourner clustering sur Trojelsgaard
 - Réussir à reproduire résultat de @abramovStructureKnowsBest
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer 
 - Pour clustering de collections sur données ~~réelles~~ :    
    &rarr; L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
 - 👶 (délégué à Mona) Clustering sur Doré :
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
        - ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
    - Regarder *Largest gap* sur réseaux Doré
    - ⌛ Essayer *clustering* sur `supinfo`
        - CAH et Kmeans tendent vers faire $K = 13$ clusters sur les supinfos
        - Enrichir avec des métriques sur les réseaux (nestedness, connectance autres ?)
        - Demander à Elisa pour la signification des métadonnées 
        - Demander à Elisa une fois vu cohérences de groupe voir pour interprétation écologiques ?
        - Algo de clustering sur les groupes trouvés
 ### Inférence et microbes
 #### Modèle avec covariables sur probas d'appartenances aux groupes
 Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
 \begin{align*}
 Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
 W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
 Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
 \end{align*}
 Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
 $$
 \ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
    + \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r}       \\
    - \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
 $$
 Plusieurs possibilités pour la définition de $\rho_r^j$
 ##### Modèle 1 (Tabouy)
 Dénominateur pas correct, ne somme pas à 1.
 $\rho_r^j = \frac{\exp{\beta_r X_j\mathbf{1}_{\{r\neq R\}}}}{1+\sum_{s=1}^{R-1} \beta_s X_j}, \beta_R = 0$ et $\rho_R^{j} = \frac{1}{1+\sum_{s=1}^{R-1} \beta_s X_j}$ (pas de compréhension intuitive)
 La partie pertinente de l'ELBO devient:
 $$
  P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) =  \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j \mathbb{1}_{r\neq R} - \log (1+\sum_{s=1}^{R-1} \beta_s X_j))]
 $$
 Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
 \begin{align*}
 \dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j}{1+\sum_{s=1}^{R-1} \exp{\beta_s X_j}} \biggr]\\
 & = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} -  \frac{1}{1+\sum_{s=1}^{R-1} \beta_s X_j} \bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_R^j \bigr) X_j\biggr] 
 \end{align*}
 ❓ Gradient mesure l'écart entre probas a posteriori et la proba a priori du groupe de référence ?
 **Conclusion**: Il manque l'exponentielle cette formulation ne somme pas à 1.
 ##### Modèle Sophie
 Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Mais il y a besoin de poser une contrainte sur l'un des $(\beta_r)_{r=1,\dots,R}$, ici $\beta_R = 0$.
 La partie pertinente de l'ELBO devient:
 $$
  P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) =  \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
 $$
 Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
 \begin{align*}
 \dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
 & = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr] 
 \end{align*}
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 - Idée des groupes sur la base de distance phylogénétique:
    - En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
    - En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
    - Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
    - Lire Papier UniFrac
 #### Réflexion
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
 - Regarder **SPARTA** Rennes
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 - 🆕 Regarder NetComi
 - 🆕 Regarder OneNet car aggrégation plus robuste
 - 🆕 Réfléchir sens d'aggréger les données ou de les diviser 
 #### Écrire et faire tourner
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - 🆕 SparCC à différent niveaux
 - 🆕 SBM à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2025-50/references.bib
+++ b/suivi/2025-50/references.bib
@ -0,0 +1,155 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{abramovStructureKnowsBest,
  title      = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
  author     = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2025-51/2025-51.qmd
+++ b/suivi/2025-51/2025-51.qmd
@ -0,0 +1,334 @@
 ---
 title: "Bilan semaine 51 2025 : 15 décembre - 19 décembre"
 categories: [colBiSBM, inférence, GNN]
 date: 2025 12 19
 date-modified: last-modified
 bibliography: references.bib
 # from: markdown+latex_macros
 ---
 {{< include /_macros.tex >}}
 ## TODO List
 - ✅ **C'est fait** Passer version article flat dans Gitlab du papier et nettoyer au minimum sur une branche clean.
 - ✅ Corrigée !⚠️ IL Y A UNE TYPO SUR LE SIGNE DE L'ENTROPIE POUR LE PAPIER: $- \mathcal{H}$ au lieu de $+\mathcal{H}$
 - ✅ Faire tourner clustering sur Trojelsgaard. **Fait mais ne sépare personne**.
 - Petites opérations sur les OTUs (regarder la matrice dans les yeux):
    - Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
    - ✅ **Dans un RMD sur Human Microbiome Compendium** Dessiner les graphiques : $\Var[OTU] = f(\Esp[OTU]), \frac{\Var[OTU]}{\Esp[OTU]^2} = f(\Esp[OTU])$ et $\frac{\Var[OTU]}{\Esp[OTU]} = f(\Esp[OTU]) (\approx 1)$ si les données suivent une loi de Poisson. 
        - HMC sur-dispersés (au-dessus bissectrice)
        - Enterotype phyloseq sous-disp
    - Regarder la proportion de 1. taxon rares, 2. zeros.
    - Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
    - *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
 - ✅ Faire tourner un LBM sur Human Gut et voir si ça plante sinon, **ça plante, la ram est surchargée.**
    - ❎⌛ Je tente avec SparseBM de JBL sur Python. **Ne gère pas le Poisson**
    - Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
 - Increasing size :
 ![](figs/tendance_temps.png)
 - ⌛ Prendre jeu de données exemple de phyloseq : 
    - ✅ 😞 enterotype tourne mais pas bon résultats (semble deux blocs échantillons mais pas vu par le modèle).
    - 🕑 des jeux de données de Mahendra ne tourne pas (phase forward interminable).
 - Relire @peixotoHierarchicalBlockStructures2014
    - Regarder les gens qui citent les travaux de Peixoto
 - Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
 :::{.callout-note title="Idées"}
 - Travailler sur Fungus Tree network
 - Comparaison covar prop avec GREMLINS multipartite sur (log(dist_phylo), fungus-tree)
 - Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
 - ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
    - ⌛ Simulations avec $n_2$ croissant lancée sur Migale
    - Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
 - ✅ **Inutile car besoin du primal** Chercher à formuler le problème dual (s'il existe?) de l'optimisation du LBM. Peut-être possible d'aller plus vite alors ? @eq-dual 
 :::
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
 - Codes pour le papier :
    - Nettoyer les scripts
    - Faire un joli README
    - ❓Faire des notebooks
 - Réussir à reproduire résultat de @abramovStructureKnowsBest
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - 👶 (délégué à Mona) Clustering sur Doré :
    - Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
        - ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
    - Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
    - Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
    - Regarder *Largest gap* sur réseaux Doré
    - ⌛ Essayer *clustering* sur `supinfo`
        - CAH et Kmeans tendent vers faire $K = 13$ clusters sur les supinfos
        - Enrichir avec des métriques sur les réseaux (nestedness, connectance autres ?)
        - Demander à Elisa pour la signification des métadonnées 
        - Demander à Elisa une fois vu cohérences de groupe voir pour interprétation écologiques ?
        - Algo de clustering sur les groupes trouvés
 ### Inférence et microbes
 #### Modèle avec covariables sur probas d'appartenances aux groupes
 Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
 \begin{align*}
 Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
 W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
 Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
 \end{align*}
 Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
 $$
 \ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
    + \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r}       \\
    - \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
 $$
 Plusieurs possibilités pour la définition de $\rho_r^j$
 ##### Modèle Sophie
 Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Mais il y a besoin de poser une contrainte sur l'un des $(\beta_r)_{r=1,\dots,R}$, ici $\beta_R = 0$.
 La partie pertinente de l'ELBO devient:
 $$
  P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) =  \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
 $${#eq-modele-covar-prop}
 Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
 \begin{align*}
 \dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
 & = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr] 
 \end{align*}
 #### Idée du problème dual
 Les distributions variationnelles sont définies par :
 $$
 q(Z,W)
 =
 \prod_{i=1}^{n_1} q_i(Z_i)
 \prod_{j=1}^{n_2} q_j(W_j),
 $$
 avec
 $$
 q_i(Z_i=q)=\tau_{iq}^{(1)}, 
 \qquad
 q_j(W_j=r)=\tau_{jr}^{(2)}.
 $$
 Les contraintes de normalisation sont :
 $$
 \sum_{q=1}^Q \tau_{iq}^{(1)} = 1,
 \qquad
 \sum_{r=1}^R \tau_{jr}^{(2)} = 1.
 $$
 ---
 ##### Lagrangien
 Le lagrangien du problème variationnel s’écrit :
 $$
 \mathcal{L}\!\left(
 \tau^{(1)},\tau^{(2)},(\lambda_i)_{i=1}^{n_1},(\mu_j)_{j=1}^{n_2}
 \right)
 =
 \ELBORTheta
 +
 \sum_{i=1}^{n_1} \lambda_i
 \left(1-\sum_{q=1}^Q \tau_{iq}^{(1)}\right)
 +
 \sum_{j=1}^{n_2} \mu_j
 \left(1-\sum_{r=1}^R \tau_{jr}^{(2)}\right),
 $$
 où $\ELBORTheta$ désigne la borne inférieure variationnelle
 associée au modèle et aux paramètres $\Theta$.
 ---
 ##### Problème primal (conditions d’optimalité)
 En dérivant le lagrangien par rapport aux variables variationnelles
 $\tau^{(1)}$ et $\tau^{(2)}$, puis en égalisant à zéro, on obtient
 les équations de point fixe suivantes :
 $$
 \tau_{iq}^{(1)}
 \propto
 \pi_q^{(t)}
 \prod_{j=1}^{n_2}
 \prod_{r=1}^{R}
 f\!\left(Y_{ij};\alpha_{qr}^{(t)}\right)^{\tau_{jr}^{(2),(t+1)}},
 \quad
 \forall i=1,\dots,n_1,\;
 q=1,\dots,Q,
 $$
 $$
 \tau_{jr}^{(2)}
 \propto
 \rho_r^{(t)}
 \prod_{i=1}^{n_1}
 \prod_{q=1}^{Q}
 f\!\left(Y_{ij};\alpha_{qr}^{(t)}\right)^{\tau_{iq}^{(1),(t+1)}},
 \quad
 \forall j=1,\dots,n_2,\;
 r=1,\dots,R,
 $$
 où :
 - $\pi_q^{(t)}$ et $\rho_r^{(t)}$ sont les proportions de classes,
 - $f(\cdot;\alpha_{qr})$ est la loi d'émission du modèle,
 - $\alpha_{qr}^{(t)}$ désigne les paramètres de bloc à l’itération $t$.
 ---
 ##### Constantes de normalisation
 Les constantes de normalisation associées sont données par :
 $$
 T^{(1),(t)}_i
 =
 \sum_{q=1}^{Q}
 \pi_q^{(t)}
 \exp\!\left(
 \sum_{j=1}^{n_2}
 \sum_{r=1}^{R}
 \tau_{jr}^{(2)}
 \log f\!\left(Y_{ij};\alpha_{qr}^{(t)}\right)
 \right),
 $$
 $$
 T^{(2),(t)}_j
 =
 \sum_{r=1}^{R}
 \rho_r^{(t)}
 \exp\!\left(
 \sum_{i=1}^{n_1}
 \sum_{q=1}^{Q}
 \tau_{iq}^{(1)}
 \log f\!\left(Y_{ij};\alpha_{qr}^{(t)}\right)
 \right).
 $$
 Ainsi, les mises à jour normalisées s’écrivent :
 $$
 \tau_{iq}^{(1)} = \frac{1}{T^{(1),(t)}_i}(\cdots),
 \qquad
 \tau_{jr}^{(2)} = \frac{1}{T^{(2),(t)}_j}(\cdots).
 $$
 ---
 ##### Interprétation duale
 Les multiplicateurs de Lagrange s’identifient alors à :
 $$
 \lambda_i = -\log T^{(1),(t)}_i - 1,
 \qquad
 \mu_j = -\log T^{(2),(t)}_j - 1,
 $$ {#eq-dual}
 et le problème dual consiste à minimiser une somme de fonctions de
 log-partition, ce qui montre que l’algorithme VEM réalise implicitement
 une descente sur le dual.
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 - Idée des groupes sur la base de distance phylogénétique:
    - En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
    - En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
    - Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
    - Lire Papier UniFrac
 #### Réflexion
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
 - Regarder **SPARTA** Rennes
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 - 🆕 Regarder NetComi
 - 🆕 Regarder OneNet car aggrégation plus robuste
 - 🆕 Réfléchir sens d'aggréger les données ou de les diviser 
 #### Écrire et faire tourner
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - 🆕 SparCC à différent niveaux
 - 🆕 SBM à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2025-51/figs/tendance_temps.png
+++ b/suivi/2025-51/figs/tendance_temps.png
--- a/suivi/2025-51/references.bib
+++ b/suivi/2025-51/references.bib
@ -0,0 +1,176 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{peixotoHierarchicalBlockStructures2014,
  title      = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
  author     = {Peixoto, Tiago P.},
  year       = 2014,
  month      = mar,
  journal    = {Physical Review X},
  volume     = {4},
  number     = {1},
  pages      = {011047},
  issn       = {2160-3308},
  doi        = {10.1103/PhysRevX.4.011047},
  urldate    = {2025-09-26},
  copyright  = {http://creativecommons.org/licenses/by/3.0/},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
 }
@article{abramovStructureKnowsBest,
  title      = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
  author     = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2026-12/2026-12.qmd
+++ b/suivi/2026-12/2026-12.qmd
@ -0,0 +1,165 @@
 ---
 title: "Bilan semaine 9 2026 : 16 mars - 20 mars"
 categories: [colBiSBM, inférence, GNN]
 date: 2026 03 16
 date-modified: last-modified
 bibliography: references.bib
 # from: markdown+latex_macros
 ---
 {{< include /_macros.tex >}}
 ## TODO List
 ### Mes priorités de la semaine
 - Faire tourner clustering colBiSBM sur les clusters dégagés par Mona et l'accompagner sur la rédaction de son poster
 - Préparer ma présentation (voir le bloc ci-après) pour Rochebrune et donner un titre : 
    - "Comparing networks, a challenging task?" (NUL/20)
    - "High and low: comparing networks, ~~a burden for the mind~~ what works and what don't(?)"
 - Finir implémentation dans sbm de blockmodels avec covariables sur les noeuds
 - Bricoler une pipeline sbm |> nnet::multinom comme performance de référence pour l'introduction de covariables
 - Comprendre pourquoi l'idée géniale de Sophie est remise en question
 - Corriger les copies des 1As
 :::{.callout-note title="Idées présentation Rochebrune"}
 - colBiSBM: ce qui marche (et à la fin la galère du clustering)
 - Transition sur les OTUs et motivations de pourquoi c'est galère (#OTU>>#Sample, dépendance par la phylogénie ...)
 - Motivation du co-clustering (LBM), trouver des groupes d'échantillons et d'OTUs qui exhibent des comportement différents (pathologies, sols particuliers, échantillon alimentaire avec une flore d'intérêt ...)
 - Première idée: LBM séquentiel, faire repartir des $\tau^{l}$ pour initialiser les $\tau^{l+1}$ selon l'arbre phylogénétique
    - Idées: faciliter l'exploration du paysage de l'ELBO en se plaçant dans une région de paramètres qui a du sens phylo et donc en sortir indiquerait un signal dans les données.
    - Problèmes: ne résout pas le problème de la dimensionnalité en les OTUs
    - Comme implém., performances pas incroyable et signal peu clair.
 - Deuxième idée: SBM et LBM avec covariables sur les noeuds. Reconstruire des positions dans un espace phylogénétique à partir des matrices de distances phylogénétique (et donc en accord avec l'arbre). Mais aussi modèle plus large pour prendre en compte diverses situations (trouver des exemples d'autres données?).
    - Idées: possible de former les groupes a priori selon les tendances dans les covariables (reflet de la phylogénie) et mettre à jour selon les données
    - Quasiment implémenté dans `{blockmodels}` et dans `{sbm}` (j'aimerai pouvoir dire le jour de ma présentation que c'est dispo sur la version de développement, il va falloir charbonner de mon côté).
    - Théoriquement: on a l'identifiabilité (ou pas vu la pratique?)
    - Problèmes: Ne résoud pas les problèmes de calculs, en pratique on ne retrouve pas les bons coefficients (label-switching?)
 - Troisième idée: Utiliser la structure de l'arbre phylogénétique pour encoder une relation des positions latentes dans un *Latent Position Model* (LPM).
    - Détail: pour chaque couche $l$, chaque individu de la couche $i$ et son ancêtre $j=Ancestor(i)$ (CITER LES PAPIERS A L'ORIGINE DE L'IDEE), on écrit $\gamma_{l,i} = \gamma_{l-1,j=Ancestor(i)} + \delta_{l,i} = \gamma_{0} + \sum_{k\in Ancestry(i)} \delta_{l,k}$ (puisque qu'on a un unique ancêtre dans chaque couche $l$), les noeuds qui partagent un ancêtre commun
    partage la position latente à ce niveau et lui ajoute un décalage $\delta$.
    - Remarque: Peut-être possible de gérer les cas de transferts horizontaux en élargissant le concept de lignée ancestrale?
    - Idée: (casquette de biologiste) possible d'avoir dans une même lignée phylogénétique des individus qui se spécialisent voire qui développent une convergence évolutive et acquièrent des traits phénotypiques qui ressemblent à d'autres familles.
    Dans ce cas, le $\delta_{l,i}$ rapprochent le $\gamma_{l,i}$ d'un $\gamma_{l, i^{\prime}}$ qui a le trait commun.
    - Problème: les calculs seraient ils simplifiés? pas sûr. Et je n'y ai pas encore touché.
 - Utiliser les Hierarchical SBM et LBM de Peixoto dans son package `graphtools` pour initialiser l'arbre liant les couches avec l'arbre phylogénétique.
 :::
 ### Les autres tâches
 - Petites opérations sur les OTUs (regarder la matrice dans les yeux):
    - Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
        - HMC sur-dispersés (au-dessus bissectrice)
        - Enterotype phyloseq sous-disp
    - Regarder la proportion de 1. taxon rares, 2. zeros.
    - Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
    - *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
 - Relire @peixotoHierarchicalBlockStructures2014
    - Regarder les gens qui citent les travaux de Peixoto
    - Utiliser graphtools en initialisant la recherche Nested avec le partitionnement donné par l'arbre phylogénétique.
 :::{.callout-note title="Idées"}
 - Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
 - ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
    - ⌛ Simulations avec $n_2$ croissant lancée sur Migale
    - Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
 :::
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
 - Codes pour le papier :
    - Nettoyer les scripts
    - Faire un joli README
    - ❓Faire des notebooks
 - Réussir à reproduire résultat de @abramovStructureKnowsBest
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - 👶 (délégué à Mona) Clustering sur Doré :
    - Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
 ### Inférence et microbes
 - ⌛ (En cours) Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
 Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
 et en renvoyant l'ELBO adaptée.
    - 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
    - 😢 Besoin de réfléchir a une bonne implémentation.
 J'ai codé l'optimisation et les transferts mais il faut que je vérifie que tout fonctionne
 - ✅ Appliqué multipartite sur $\forall i, OTU_i \times Sample$:
 ![Le plot des groupes trouvés par le multipartite (2 pour tous les OTUs et 4 pour les échantillons.)](figs/Multipartite.svg)
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 - Idée des groupes sur la base de distance phylogénétique:
    - En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
    - En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
    - Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
    - Lire Papier UniFrac
 #### Écrire et faire tourner
 - 🆕 SparCC à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2026-12/references.bib
+++ b/suivi/2026-12/references.bib
@ -0,0 +1,176 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{peixotoHierarchicalBlockStructures2014,
  title      = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
  author     = {Peixoto, Tiago P.},
  year       = 2014,
  month      = mar,
  journal    = {Physical Review X},
  volume     = {4},
  number     = {1},
  pages      = {011047},
  issn       = {2160-3308},
  doi        = {10.1103/PhysRevX.4.011047},
  urldate    = {2025-09-26},
  copyright  = {http://creativecommons.org/licenses/by/3.0/},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
 }
@article{abramovStructureKnowsBest,
  title      = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
  author     = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2026-6/2026-6.qmd
+++ b/suivi/2026-6/2026-6.qmd
@ -0,0 +1,238 @@
 ---
 title: "Bilan semaine 6 2026 : 02 février - 06 février"
 categories: [colBiSBM, inférence, GNN]
 date: 2026 02 06
 date-modified: last-modified
 bibliography: references.bib
 # from: markdown+latex_macros
 ---
 {{< include /_macros.tex >}}
 ## TODO List
 - Petites opérations sur les OTUs (regarder la matrice dans les yeux):
    - Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
        - HMC sur-dispersés (au-dessus bissectrice)
        - Enterotype phyloseq sous-disp
    - Regarder la proportion de 1. taxon rares, 2. zeros.
    - Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
    - *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
 - ✅ Faire tourner un LBM sur Human Gut et voir si ça plante sinon, **ça plante, la ram est surchargée.**
    - TODO Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
 - ✅ Avec blockmodels, codé un LBM-Séquentiel. *Des différences contrastées...*
 - ⌛ Prendre jeu de données exemple de phyloseq : 
    - ✅ 😞 enterotype tourne mais pas bon résultats (semble deux blocs échantillons mais pas vu par le modèle).
    - ✅ des jeux de données de Mahendra ne tourne pas (phase forward interminable).
 - Relire @peixotoHierarchicalBlockStructures2014
    - Regarder les gens qui citent les travaux de Peixoto
 - Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
 :::{.callout-note title="Idées"}
 - Travailler sur Fungus Tree network
 - 🔍**Demander à PB et SD** : Comparaison covar prop avec GREMLINS multipartite sur (log(dist_phylo), fungus-tree)
 - Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
 - ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
    - ⌛ Simulations avec $n_2$ croissant lancée sur Migale
    - Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
 :::
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
 - Codes pour le papier :
    - Nettoyer les scripts
    - Faire un joli README
    - ❓Faire des notebooks
 - Réussir à reproduire résultat de @abramovStructureKnowsBest
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - 👶 (délégué à Mona) Clustering sur Doré :
    - Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
 ### Inférence et microbes
 - ✅ En préparation d'un fichier (réu avec JBL à 10h45 le 06/02/2026).
 Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
 Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
 et en renvoyant l'ELBO adaptée.
    - 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
    - 😢 Besoin de réfléchir a une bonne implémentation.
 #### Modèle avec covariables sur probas d'appartenances aux groupes
 \begin{align*}
 \pmb{\beta}_{r}& = \begin{pmatrix}
    \beta_{r,0}\\
    \vdots\\
    \beta_{r,p}
 \end{pmatrix}, & X_{:,j} = \begin{pmatrix}
    1\\
    x_{1}\\
    \vdots\\
    x_p
 \end{pmatrix}\\
 \pmb{\beta}_r^{\top} X_{:,j}& = \beta_{r,0} + \beta_{r,1} x_{1} + \dots + \beta_{r,p} x_p & \approx \log(\rho_r^j) \\
 \pmb{B} & = \begin{pmatrix}
 \pmb{\beta}_1 \dots \pmb{\beta}_r \dots \pmb{\beta}_R
 \end{pmatrix} & \pmb{B}^{\top} X_{:,j} \approx \log(\pmb{\rho}^j) \\
 \pmb{B}^{\top} \pmb{X} & \approx \log((\pmb{\rho}^j)_{j=1,\dots,n_2}) = \log(\pmb{\Rho})\\
 \end{align*}
 Et pour les probas en lignes du LBM
 \begin{align*}
 \pmb{\gamma}_{q}& = \begin{pmatrix}
    \gamma_{q,0}\\
    \vdots\\
    \gamma_{q,d}
 \end{pmatrix}, & V_{:,i} = \begin{pmatrix}
    1\\
    v_{1}\\
    \vdots\\
    v_d
 \end{pmatrix}\\
 \pmb{\gamma}_q^{\top} V_{:,i}& = \gamma_{q,0} + \gamma_{q,1} x_{1} + \dots + \gamma_{q,p} x_p & \approx \log(\pi_q^i) \\
 \pmb{\Gamma} & = \begin{pmatrix}
 \pmb{\gamma}_1 \dots \pmb{\gamma}_q \dots \pmb{\gamma}_Q
 \end{pmatrix} & \pmb{\Gamma}^{\top} V_{:,i} \approx \log(\pmb{\pi}^i) \\
 \pmb{\Gamma}^{\top} \pmb{X} & \approx \log((\pmb{\pi}^i)_{i=1,\dots,n_1}) = \log(\pmb{\Pi})
 \end{align*}
 #### Note sur l'identifiabilité (par JBL)
 Soient $X : (p+1, n_2), B : (p+1, R)$ avec $X$ de plein rang, i.e., $rg(X) = p+1\implies XX^{\top}$ est inversible.
 On veut qu'il existe $B^{\prime}$ et $B$ avec $B_{:,R} = \vec 0_p$, par les propriétés de la fonction softmax, $\sigma(.)$ :
 \begin{align*}
 & \sigma(B^{\top}X) = \sigma({B^{\prime}}^{\top}X)\\
 & \iff \exists C \in \mathbb{R}^{n_2}, B^{\top} X = {B^{\prime}}^{\top} X + \pmb{1}_R C^{\top}\\
 & \iff \exists C \in \mathbb{R}^{n_2}, B^{\top} X - \pmb{1}_R C^{\top} = {B^{\prime}}^{\top} X\\
 & \iff \exists C \in \mathbb{R}^{n_2}, (B^{\top} X - \pmb{1}_R C^{\top}) X^{\top} = {B^{\prime}}^{\top} X X^{\top}\\
 & \iff \exists C \in \mathbb{R}^{n_2}, (B^{\top} X - \pmb{1}_R C^{\top}) X^{\top}(X X^{\top})^{-1} = {B^{\prime}}^{\top}\\
 \end{align*}
 #### Description du modèle hiérarchique
 Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
 \begin{align*}
 Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
 W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
 Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
 \end{align*}
 Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
 $$
 \ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
    + \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r}       \\
    - \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
 $$
 ##### Modèle Sophie
 Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Mais il y a besoin de poser une contrainte sur l'un des $(\beta_r)_{r=1,\dots,R}$, ici $\beta_R = 0$.
 La partie pertinente de l'ELBO devient:
 $$
  P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) =  \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
 $${#eq-modele-covar-prop}
 Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
 \begin{align*}
 \dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
 & = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr] 
 \end{align*}
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 - Idée des groupes sur la base de distance phylogénétique:
    - En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
    - En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
    - Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
    - Lire Papier UniFrac
 #### Réflexion
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
 - Regarder **SPARTA** Rennes
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 - 🆕 Regarder NetComi
 - 🆕 Regarder OneNet car aggrégation plus robuste
 - 🆕 Réfléchir sens d'aggréger les données ou de les diviser 
 #### Écrire et faire tourner
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - 🆕 SparCC à différent niveaux
 - 🆕 SBM à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2026-6/references.bib
+++ b/suivi/2026-6/references.bib
@ -0,0 +1,176 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{peixotoHierarchicalBlockStructures2014,
  title      = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
  author     = {Peixoto, Tiago P.},
  year       = 2014,
  month      = mar,
  journal    = {Physical Review X},
  volume     = {4},
  number     = {1},
  pages      = {011047},
  issn       = {2160-3308},
  doi        = {10.1103/PhysRevX.4.011047},
  urldate    = {2025-09-26},
  copyright  = {http://creativecommons.org/licenses/by/3.0/},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
 }
@article{abramovStructureKnowsBest,
  title      = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
  author     = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2026-7/2026-7.qmd
+++ b/suivi/2026-7/2026-7.qmd
@ -0,0 +1,226 @@
 ---
 title: "Bilan semaine 7 2026 : 09 février - 13 février"
 categories: [colBiSBM, inférence, GNN, covariables, identifiabilité]
 date: 2026 02 13
 date-modified: last-modified
 bibliography: references.bib
 # from: markdown+latex_macros
 ---
 {{< include /_macros.tex >}}
 ## TODO List
 - Petites opérations sur les OTUs (regarder la matrice dans les yeux):
    - Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
        - HMC sur-dispersés (au-dessus bissectrice)
        - Enterotype phyloseq sous-disp
    - Regarder la proportion de 1. taxon rares, 2. zeros.
    - Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
    - *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
 - ✅ Avec blockmodels, codé un LBM-Séquentiel. *Des différences contrastées...*
    - TODO Ajouter lien vers notebooks résultats
 - Relire @peixotoHierarchicalBlockStructures2014
    - Regarder les gens qui citent les travaux de Peixoto
 - ⌛ **En cours** Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
 :::{.callout-note title="Idées"}
 - Travailler sur Fungus Tree network
 - ⌛**Demander à PB et SD**, ils regardent : Comparaison covar prop avec GREMLINS multipartite sur (log(dist_phylo), fungus-tree)
 - Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
 - ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
    - ⌛ Simulations avec $n_2$ croissant lancée sur Migale
    - Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
 :::
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
 - Codes pour le papier :
    - Nettoyer les scripts
    - Faire un joli README
    - ❓Faire des notebooks
 - Réussir à reproduire résultat de @abramovStructureKnowsBest
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - 👶 (délégué à Mona) Clustering sur Doré :
    - Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
 ### Inférence et microbes
 - ✅ En préparation d'un fichier (réu avec JBL à 10h45 le 06/02/2026).
 Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
 Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
 et en renvoyant l'ELBO adaptée.
    - 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
    - 😢 Besoin de réfléchir a une bonne implémentation.
 #### Modèle avec covariables sur probas d'appartenances aux groupes
 \begin{align*}
 \pmb{\beta}_{r}& = \begin{pmatrix}
    \beta_{r,0}\\
    \vdots\\
    \beta_{r,p}
 \end{pmatrix}, & X_{j,\bullet} = \begin{pmatrix}
    1 = x_{0,j} & x_{1,j} & \dots & x_{p,j}
 \end{pmatrix}\\
 X_{j,\bullet} \pmb{\beta}_r& = \beta_{r,0} x_{0,j} + \beta_{r,1} x_{1,j} + \dots + \beta_{r,p} x_{p,j} & \approx \log(\rho_r^j) \\
 B & = \begin{pmatrix}
 \pmb{\beta}_1 \dots \pmb{\beta}_r \dots \pmb{\beta}_R
 \end{pmatrix} & X_{j,\bullet}B \approx \log(\pmb{\rho}^j) \\
 X B & \approx \log((\pmb{\rho}^j)_{j=1,\dots,n_2}) = \log(\pmb{\Rho})\\
 \end{align*}
 Et pour les probas en lignes du LBM
 \begin{align*}
 \pmb{\gamma}_{q}& = \begin{pmatrix}
    \gamma_{q,0}\\
    \vdots\\
    \gamma_{q,d}
 \end{pmatrix}, & V_{i,\bullet} = \begin{pmatrix}
    1 = v_{0,i} & v_{1,i} & \dots & v_{d,i}
 \end{pmatrix}\\
 V_{i,\bullet} \pmb{\gamma}_q & = \gamma_{q,0} v_{0,i} + \gamma_{q,1} v_{1,i} + \dots + \gamma_{q,d} v_{d,i} & \approx \log(\pi_q^i) \\
 \Gamma & = \begin{pmatrix}
 \gamma_1 \dots \pmb{\gamma}_q \dots \pmb{\gamma}_Q
 \end{pmatrix} & V_{i,\bullet} \Gamma \approx \log(\pmb{\pi}^i) \\
 V \Gamma & \approx \log((\pmb{\pi}^i)_{i=1,\dots,n_1}) = \log(\pmb{\Pi})
 \end{align*}
 #### Preuve sur l'identifiabilité
 Soient $B,B^{\prime}$ avec $B_{\bullet,R} = B^{\prime}_{\bullet,R} = \vec{0}_{p+1}$ et $X$ de rang plein tel que $X^{\top}X$ soit inversible.
 \begin{align*}
 &\sigma(XB) = \sigma(XB^{\prime})\\
 &\implies \exists C = \begin{pmatrix}c_1 \\ \vdots \\ c_j \\ \vdots \\ c_{n_2}\end{pmatrix} \in \mathbb{R}^{n_2}, X B = X B^{\prime} + C \pmb{1}_{R}^{\top} \\
 &\implies \exists C \in \mathbb{R}^{n_2}, (X B)_{j,r} = (X B^{\prime})_{j,r} + (C \pmb{1}_{R}^{\top})_{j,r} \\
 &\implies \exists C \in \mathbb{R}^{n_2}, \forall r\in\{1\dots,R\}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \beta_{k,r} = \sum_{k=1}^{p+1} x_{j,k} \beta^{\prime}_{k,r} + c_j\\
 &\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \beta_{k,R} = \sum_{k=1}^{p+1} x_{j,k} \beta^{\prime}_{k,R} + c_j \\
 &\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \times 0 = \sum_{k=1}^{p+1} x_{j,k} \times 0 + c_j \\
 &\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, 0 = 0 + c_j \implies c_j = 0 \\
 &\implies C = \begin{pmatrix} 0 \\ \vdots \\ 0 \end{pmatrix} \text{and thus}, XB = XB^{\prime} \\
 & \implies (X^{\top} X)^{-1}X^{\top} X B = (X^{\top} X)^{-1}X^{\top} X B^{\prime} \implies B=B^{\prime}
 \end{align*}
 #### Description du modèle hiérarchique
 Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
 \begin{align*}
 Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
 W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
 Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
 \end{align*}
 Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
 $$
 \ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
    + \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r}       \\
    - \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
 $$
 ##### Modèle Sophie
 Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Mais il y a besoin de poser une contrainte sur l'un des $(\beta_r)_{r=1,\dots,R}$, ici $\beta_R = 0$.
 La partie pertinente de l'ELBO devient:
 $$
  P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) =  \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
 $${#eq-modele-covar-prop}
 Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
 \begin{align*}
 \dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
 & = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr] 
 \end{align*}
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 - Idée des groupes sur la base de distance phylogénétique:
    - En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
    - En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
    - Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
    - Lire Papier UniFrac
 #### Réflexion
 - easy16s : se renseigner sur 
    - $\alpha$, $\beta$ diversité
    - Heatmap
 - Regarder **SPARTA** Rennes
 - Ecrire et étudier les modèles pour différents niveaux taxonomiques.
 - 🆕 Regarder NetComi
 - 🆕 Regarder OneNet car aggrégation plus robuste
 - 🆕 Réfléchir sens d'aggréger les données ou de les diviser 
 #### Écrire et faire tourner
 - Lancer *colBiSBM* sur $OTU\times Sample$ &rarr; problème du chargement en mémoire des données à voir
 - Lancer *colSBM* sur $OTU\times OTU$
 - TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
 - 🆕 SparCC à différent niveaux
 - 🆕 SBM à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2026-7/references.bib
+++ b/suivi/2026-7/references.bib
@ -0,0 +1,176 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{peixotoHierarchicalBlockStructures2014,
  title      = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
  author     = {Peixoto, Tiago P.},
  year       = 2014,
  month      = mar,
  journal    = {Physical Review X},
  volume     = {4},
  number     = {1},
  pages      = {011047},
  issn       = {2160-3308},
  doi        = {10.1103/PhysRevX.4.011047},
  urldate    = {2025-09-26},
  copyright  = {http://creativecommons.org/licenses/by/3.0/},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
 }
@article{abramovStructureKnowsBest,
  title      = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
  author     = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2026-8/2026-8.qmd
+++ b/suivi/2026-8/2026-8.qmd
@ -0,0 +1,130 @@
 ---
 title: "Bilan semaine 8 2026 : 16 février - 20 février"
 categories: [colBiSBM, inférence, GNN]
 date: 2026 02 13
 date-modified: last-modified
 bibliography: references.bib
 # from: markdown+latex_macros
 ---
 {{< include /_macros.tex >}}
 ## TODO List
 - Petites opérations sur les OTUs (regarder la matrice dans les yeux):
    - Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
        - HMC sur-dispersés (au-dessus bissectrice)
        - Enterotype phyloseq sous-disp
    - Regarder la proportion de 1. taxon rares, 2. zeros.
    - Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
    - *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
 - ✅ Avec blockmodels, codé un LBM-Séquentiel. *Des différences contrastées...*
    - TODO Ajouter lien vers notebooks résultats
 - Relire @peixotoHierarchicalBlockStructures2014
    - Regarder les gens qui citent les travaux de Peixoto
 - ⌛ **En cours** Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
 :::{.callout-note title="Idées"}
 - Travailler sur Fungus Tree network
 - ✅ **Demander à PB et SD**, ils regardent : Comparaison covar prop avec GREMLINS multipartite sur (log(dist_phylo), fungus-tree)
 - Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
 - ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
    - ⌛ Simulations avec $n_2$ croissant lancée sur Migale
    - Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
 :::
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
 - Codes pour le papier :
    - Nettoyer les scripts
    - Faire un joli README
    - ❓Faire des notebooks
 - Réussir à reproduire résultat de @abramovStructureKnowsBest
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - 👶 (délégué à Mona) Clustering sur Doré :
    - Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
 ### Inférence et microbes
 - ✅ En préparation d'un fichier (réu avec JBL à 10h45 le 06/02/2026).
 Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
 Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
 et en renvoyant l'ELBO adaptée.
    - 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
    - 😢 Besoin de réfléchir a une bonne implémentation.
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 - Idée des groupes sur la base de distance phylogénétique:
    - En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
    - En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
    - Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
    - Lire Papier UniFrac
 #### Écrire et faire tourner
 - 🆕 SparCC à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2026-8/references.bib
+++ b/suivi/2026-8/references.bib
@ -0,0 +1,176 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{peixotoHierarchicalBlockStructures2014,
  title      = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
  author     = {Peixoto, Tiago P.},
  year       = 2014,
  month      = mar,
  journal    = {Physical Review X},
  volume     = {4},
  number     = {1},
  pages      = {011047},
  issn       = {2160-3308},
  doi        = {10.1103/PhysRevX.4.011047},
  urldate    = {2025-09-26},
  copyright  = {http://creativecommons.org/licenses/by/3.0/},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
 }
@article{abramovStructureKnowsBest,
  title      = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
  author     = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/2026-9/2026-9.qmd
+++ b/suivi/2026-9/2026-9.qmd
@ -0,0 +1,133 @@
 ---
 title: "Bilan semaine 9 2026 : 23 février - 27 février"
 categories: [colBiSBM, inférence, GNN]
 date: 2026 02 23
 date-modified: last-modified
 bibliography: references.bib
 # from: markdown+latex_macros
 ---
 {{< include /_macros.tex >}}
 ## TODO List
 - Petites opérations sur les OTUs (regarder la matrice dans les yeux):
    - Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
        - HMC sur-dispersés (au-dessus bissectrice)
        - Enterotype phyloseq sous-disp
    - Regarder la proportion de 1. taxon rares, 2. zeros.
    - Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
    - *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
 - ✅ Avec blockmodels, codé un LBM-Séquentiel. *Des différences contrastées...*
    - [Lien vers l'application du LBM séquentiel sur les données de Chaillou](analysis_benchmark_lbm_seq.html)
 - Relire @peixotoHierarchicalBlockStructures2014
    - Regarder les gens qui citent les travaux de Peixoto
    - Utiliser graphtools en initialisant la recherche Nested avec le partitionnement donné par l'arbre phylogénétique.
 - ⌛ **En cours** Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
 :::{.callout-note title="Idées"}
 - Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
 - ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
    - ⌛ Simulations avec $n_2$ croissant lancée sur Migale
    - Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
 :::
 - Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
 - Codes pour le papier :
    - Nettoyer les scripts
    - Faire un joli README
    - ❓Faire des notebooks
 - Réussir à reproduire résultat de @abramovStructureKnowsBest
 - Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
 - Maitriser SparCC
 - 👶 (délégué à Mona) Clustering sur Doré :
    - Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
 ### Inférence et microbes
 - ⌛ (En cours) Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
 Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
 et en renvoyant l'ELBO adaptée.
    - 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
    - 😢 Besoin de réfléchir a une bonne implémentation.
 J'ai codé l'optimisation et les transferts mais il faut que je vérifie que tout fonctionne
 - ✅ Appliqué multipartite sur $\forall i, OTU_i \times Sample$:
 ![Le plot des groupes trouvés par le multipartite (2 pour tous les OTUs et 4 pour les échantillons.)](figs/Multipartite.svg)
 #### Bibliographie: à lire, à faire
 - Lire article multi-niveaux Saint-Clair
 - 🆕 🔎 Trouver des papiers: 
    - LBM Negative Binomial
    - Network inference through sample comparison
 - Idée des groupes sur la base de distance phylogénétique:
    - En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
    - En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
    - Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
    - Lire Papier UniFrac
 #### Écrire et faire tourner
 - 🆕 SparCC à différent niveaux
 - 🆕⌛ Tree-PLN à différents niveaux
 #### Causalité
 Plus sur le temps long, à regarder
 - GT causalité
 - Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
 ## A discuter
 - 🆕 Voir pour des Réseaux / GDR ou aller
 - 🆕 Chercher des cours à suivre
 ## Biblio à faire
 - Regarder Transport optimal graphes bipartite.
 ## Lectures en cours 📚
 ### HDR Vincent Brault
 - ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
 - Chap 3
 ### OT
 - ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
 - ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
 - ⌛ @nennaLecture1Monge
 ### Inférence de graphes
 - ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
 - ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
 ### Causalité
 - ❗📖 @bystrovaCausalDiscovery
 ### Largest Gaps
 - ❗📖 @braultFastConsistentAlgorithm2023
 - ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
--- a/suivi/2026-9/analysis_benchmark_lbm_seq.html
+++ b/suivi/2026-9/analysis_benchmark_lbm_seq.html
--- a/suivi/2026-9/figs/Multipartite.svg
+++ b/suivi/2026-9/figs/Multipartite.svg
--- a/suivi/2026-9/references.bib
+++ b/suivi/2026-9/references.bib
@ -0,0 +1,176 @@
@article{mazeletUnsupervisedLearningOptimal,
  title      = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author     = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
  abstract   = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
 }
@article{peixotoHierarchicalBlockStructures2014,
  title      = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
  author     = {Peixoto, Tiago P.},
  year       = 2014,
  month      = mar,
  journal    = {Physical Review X},
  volume     = {4},
  number     = {1},
  pages      = {011047},
  issn       = {2160-3308},
  doi        = {10.1103/PhysRevX.4.011047},
  urldate    = {2025-09-26},
  copyright  = {http://creativecommons.org/licenses/by/3.0/},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
 }
@article{abramovStructureKnowsBest,
  title      = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
  author     = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
 }
@article{nennaLecture2Entropic,
  title      = {Lecture 2: {{Entropic Optimal Transport}}},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
 }
@article{nennaLecture1Monge,
  title      = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author     = {Nenna, Luca},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
 }
@article{Morton2021.11.09.467939,
  title        = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author       = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
  date         = {2021},
  journaltitle = {bioRxiv : the preprint server for biology},
  shortjournal = {bioRxiv},
  eprint       = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher    = {Cold Spring Harbor Laboratory},
  doi          = {10.1101/2021.11.09.467939},
  url          = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
  abstract     = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
 }
@article{aitchisonStatisticalAnalysisCompositional1982a,
  title        = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author       = {Aitchison, J.},
  date         = {1982},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume       = {44},
  number       = {2},
  eprint       = {2345821},
  eprinttype   = {jstor},
  pages        = {139--177},
  publisher    = {[Royal Statistical Society, Oxford University Press]},
  issn         = {0035-9246},
  url          = {https://www.jstor.org/stable/2345821},
  urldate      = {2025-05-07},
  abstract     = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
 }
@online{payneFiniteMixturesMultivariate2023,
  title       = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author      = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  date        = {2023-11-13},
  eprint      = {2311.07762},
  eprinttype  = {arXiv},
  eprintclass = {stat},
  doi         = {10.48550/arXiv.2311.07762},
  url         = {http://arxiv.org/abs/2311.07762},
  urldate     = {2025-07-02},
  abstract    = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  pubstate    = {prepublished},
  keywords    = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
 }
@unpublished{bystrovaCausalDiscovery,
  title      = {Causal Discovery},
  author     = {Bystrova, Daria},
  langid     = {english},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
 }
@online{braultFastConsistentAlgorithm2023,
  title       = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author      = {Brault, Vincent and Channarond, Antoine},
  date        = {2023-03-09},
  eprint      = {1610.09005},
  eprinttype  = {arXiv},
  eprintclass = {math},
  doi         = {10.48550/arXiv.1610.09005},
  url         = {http://arxiv.org/abs/1610.09005},
  urldate     = {2025-07-09},
  abstract    = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  langid      = {english},
  pubstate    = {prepublished},
  keywords    = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation  = {Read\_Status: New\\
                 Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file        = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
 }
@article{braultGeneralisationLalgorithmeLargest,
  title      = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
  author     = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
  abstract   = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid     = {french},
  keywords   = {/unread},
  annotation = {Read\_Status: New\\
                Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file       = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
 }
@article{channarondClassificationEstimationStochastic2012,
  title        = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author       = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
  date         = {2012-01-01},
  journaltitle = {Electronic Journal of Statistics},
  shortjournal = {Electron. J. Statist.},
  volume       = {6},
  publisher    = {Institute of Mathematical Statistics},
  issn         = {1935-7524},
  doi          = {10.1214/12-ejs753},
  url          = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
  urldate      = {2025-07-09},
  abstract     = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  issue        = {none},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {Read\_Status: New\\
                  Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file         = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
 }
--- a/suivi/_metadata.yml
+++ b/suivi/_metadata.yml
@ -3,11 +3,16 @@
 # re-render posts only when a change to the source file is made ----
 freeze: auto
 author:
  name: Louis Lacoste
  email: louis.lacoste@agroparistech.fr
  affiliation: MIA Paris-Saclay, INRAE, AgroParisTech, Université Paris-Saclay
  orcid: 0009-0004-0178-9821
 # enable banner style title blocks ----
 title-block-banner: true
 crossref:
  custom:
    - kind: float
      reference-prefix: Item
      key: item
    - kind: float
      reference-prefix: Reference
      key: ref
 lightbox: true
--- a/template.qmd
+++ b/template.qmd
@ -1,18 +0,0 @@
 ---
 title: "Bilan semaine MM YYYY : dd-dd mois"
 format: 
    html:
        embed-resources: true
 ---
 ## A faire
 -
 ## J'ai fait
 -
 ## A continuer
 -
--- a/template.tar.gz
+++ b/template.tar.gz
Author	SHA1	Message	Date
Louis	350c1e7af8	Ajouts à projet phylo All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-11 14:04:39 +02:00
Louis	a95b686dd5	Contrastive learning All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-11 13:51:45 +02:00
Louis	46c72ff2f8	Ajout détails VGAE All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-11 13:45:28 +02:00
Louis	9d322580aa	Updates All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-07 18:09:08 +02:00
Louis	a6865af33e	Rem references.bib All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-07 16:01:57 +02:00
Louis	f4ff477719	Ajout appli colsbm	2026-05-07 16:01:47 +02:00
Louis	1a1adca08d	Removing dup macros All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-06 17:19:03 +02:00
Louis	809e008e0b	Working on phylogeny All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-06 17:11:25 +02:00
Louis	ee6ea17a0d	Changing name of section All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-06 14:18:10 +02:00
Louis	5ec0010732	Begin adding knowledge database	2026-05-06 14:18:01 +02:00
Louis	a608929fad	Adding knowledge base All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-05-06 10:31:16 +02:00
Louis	fddfeac25e	Ajout sem 12 2026 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-03-16 10:18:18 +01:00
Louis	49582515cd	Adding notebook on benchmark for LBM sequential All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-02-24 17:37:51 +01:00
Louis	2ec236cf64	Add sem 9 2026 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-02-24 14:30:24 +01:00
Louis	b92f76ce7d	Ajout fiche semaine 8 2026 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-02-17 14:40:16 +01:00
Louis	e1e8c2cbdc	Use of full rank for X in identifiability	2026-02-17 14:39:00 +01:00
Louis	ce020fefd4	Adding correct tags	2026-02-17 11:15:34 +01:00
Louis	1262ed36e9	Identifiability final All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-02-17 11:13:40 +01:00
Louis	dfb97f8ef3	Identifiability	2026-02-16 16:25:49 +01:00
Louis	b5ddca507a	Adding sem 7 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-02-10 16:16:20 +01:00
Louis	698190bae8	Recalling that the matrices are rectangular All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-02-09 10:18:57 +01:00
Louis	1b83ee9a41	Détails identif SBM covar groupe All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-02-09 09:36:07 +01:00
Louis	7888eed0e0	Add sem 26 6 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2026-02-06 10:51:55 +01:00
Louis	16e5e57cef	Update sem 25 51	2026-02-06 10:51:45 +01:00
Louis	4a26113838	Ajout increasing All checks were successful ci/woodpecker/manual/woodpecker Pipeline was successful Details	2026-01-05 13:30:45 +01:00
Louis	1775721b6c	Corrections articles All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-12-23 13:50:33 +01:00
Louis	2ef05d204b	Détails sur les calculs pour SVEM Some checks failed ci/woodpecker/push/woodpecker Pipeline failed Details	2025-12-23 13:50:00 +01:00
Louis	09a7edd255	Ajout détails lbm sur données otus	2025-12-23 13:45:10 +01:00
Louis	2fa9438bcc	Détails hypo poisson	2025-12-23 13:43:02 +01:00
Louis	b011e6d08b	Ajout prob computationnels	2025-12-23 13:41:19 +01:00
Louis	b70506d3c6	Supp modèle 1 et ajout détails sur dual	2025-12-23 13:40:52 +01:00
Louis	e99b9374b2	cal L macro	2025-12-23 13:36:39 +01:00
Louis	961cfe22cf	Adding notes for 15/12 meeting All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-12-15 18:27:20 +01:00
Louis	c3c132920f	New macros	2025-12-15 18:27:06 +01:00
Louis	bc2d474a73	🍱 Adding results of freeze All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-12-15 17:48:55 +01:00
Louis	f696b84f9c	💬 Adding some feedback	2025-12-15 17:48:22 +01:00
Louis	2a96a5fab5	Change with S&J All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-12-15 14:03:51 +01:00
Louis	83f1f202a8	💬 Adding content for week 50 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-12-15 13:30:57 +01:00
Louis	584580f029	🔧 Adding a common tex macros file to include	2025-12-15 13:30:04 +01:00
Louis	4dbc745461	Ajout UniFrac All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-11-07 15:54:23 +01:00
Louis	437bd3a09b	Sur l'inférence All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-11-07 14:40:18 +01:00
Louis	88ae4f0776	Stage Mona clustering Doré All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-11-07 14:13:10 +01:00
Louis	628260e5ae	Papier	2025-11-07 14:13:01 +01:00
Louis	0809cb6d65	Ajout semaine 45 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-11-06 10:50:37 +01:00
Louis	81eb9f0182	Small changes	2025-11-03 16:03:20 +01:00
Louis	ddc7ff8e6e	En cours lecture LBM Julie All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-10-31 16:20:05 +01:00
Louis	ee00e6e792	Correct date All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-10-31 09:15:41 +01:00
Louis	7d9379f43d	Sem 44	2025-10-29 17:09:06 +01:00
Louis	16c4a93403	Fix typo in sem number	2025-10-29 16:57:34 +01:00
Louis	d74ece2fb4	Update	2025-10-23 17:36:04 +02:00
Louis	6b1d7f00ce	Update All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-10-22 13:55:45 +02:00
Louis	e305d6cbd6	BIC-L All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-10-06 16:09:57 +02:00
Louis	9161ba101e	Ajout All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-10-06 14:50:38 +02:00
Louis	389917df6b	Fix date and adding what to do All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-10-06 14:28:14 +02:00
Louis	232f67e797	Adding sem 41 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-10-06 14:02:38 +02:00
Louis	495ae03acc	Fix date All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-10-06 11:19:00 +02:00
Louis	3d329f7f3a	Adding sem 38	2025-10-06 11:18:39 +02:00
Louis	d4c8e48a14	Fix date All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-08-26 10:56:57 +02:00
Louis	9b508cc881	Ajout semaine 35	2025-08-26 10:56:38 +02:00
Louis	5202f028a6	Adding easy16s All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-08-17 17:34:54 +02:00
Louis	ccb6919e4c	Ajout lecture Brault All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-08-14 16:27:41 +02:00
Louis	5c39792f7c	Fin lect intro VB All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-08-14 13:24:12 +02:00
Louis	cbeec14f7b	Ajout lecture hdr v brault All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-08-14 11:20:17 +02:00
Louis	1824a9ca8c	Ajout avancées All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-08-13 20:09:03 +02:00
Louis	9e32b94c01	Semaine 33, résultats simus missing links All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-08-11 12:30:57 +02:00
Louis	4d3f697fc6	Lightbox for images	2025-08-11 12:30:38 +02:00
Louis	21f11bde24	Modification après réunion du 15 juillet 2025 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-15 17:36:46 +02:00
Louis	62201e5eea	Semaine 29 Tableau avec sep All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-15 14:56:58 +02:00
Louis	19ce0509a9	Création sem 29 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-15 14:32:57 +02:00
Louis	2696c18994	Ajout sem 28	2025-07-15 14:32:44 +02:00
Louis	f192001150	Generalized Random Dot Product All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-11 14:28:47 +02:00
Louis	5bb6618ea3	Ajout formule simple All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-11 14:26:53 +02:00
Louis	a477053506	Largest gaps	2025-07-09 16:03:24 +02:00
Louis	c63909f1e8	Ajout formules delta additif All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-09 11:26:39 +02:00
Louis	d780576408	Description delta All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-08 11:04:26 +02:00
Louis	f118bb0bb4	Collapsed calendar All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-08 10:55:28 +02:00
Louis	4803f5f831	Ajout réunion S&D All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-07 16:37:03 +02:00
Louis	aa9c40c310	Ajout sem 28 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-07 13:30:34 +02:00
Louis	2f5525a507	Ajout de sem 27 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-07 13:28:59 +02:00
Louis	9cf318771c	Titles All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-02 17:27:45 +02:00
Louis	fe00b97f47	Adding icon for journals All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-02 12:03:15 +02:00
Louis	2fae04dd91	Ajout lien dépôt git All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-02 12:02:00 +02:00
Louis	e29afb93de	Ajout agenda All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-02 11:41:11 +02:00
Louis	e1ea12a38f	Ajout modifications suite réunion Sophie All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-02 11:38:33 +02:00
Louis	8fab3f726a	Fix indentation All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-01 14:54:11 +02:00
Louis	1629798295	Fixing itemize All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-01 10:08:39 +02:00
Louis	78e4a86700	Mise à jour lectures et discussions de la semaine. All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-07-01 09:58:53 +02:00
Louis	510e6fe964	Using date-modified	2025-07-01 09:58:34 +02:00
Louis	f33f46fcc9	Ajout lecture train All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-30 16:19:39 +02:00
Louis	6413d01870	Ajout semaine 27 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-30 11:39:45 +02:00
Louis	23901a35d1	Precision matchado	2025-06-30 11:39:35 +02:00
Louis	44e04ff2e4	Ajout fin de lecture matchado All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-30 11:30:15 +02:00
Louis	3fec342474	Ajout tableau comparais auc All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-20 12:50:53 +02:00
Louis	eff1a808f2	Date of sem 25 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-17 11:59:59 +02:00
Louis	3f743d2009	Fixing callout block All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-17 10:11:23 +02:00
Louis	b2f6f657f5	Ajout résultats finaux clustering d&a simulations All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-17 10:08:23 +02:00
Louis	95e3639c3f	Fin de journée All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-16 18:36:14 +02:00
Louis	4795b8924f	ajout vérif BICL d&a All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-16 18:22:49 +02:00
Louis	c60ba3fc09	Adding current state for week 25 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-16 18:20:38 +02:00
Louis	076d31fb75	Updating ari clustering d&a All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-16 14:04:42 +02:00
Louis	7209e9004d	Intro git All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-16 09:14:10 +02:00
Louis	de5760047b	Ajout retours sur descending and ascending and more exploration All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-15 21:08:26 +02:00
Louis	f2ecf635ba	Ajout infos sur le clustering descending and ascending All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-15 20:43:25 +02:00
Louis	0ddc44022b	Ajouts du jour All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-11 18:33:55 +02:00
Louis	33f894449f	intro git All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-10 16:02:41 +02:00
Louis	7acb4d35fc	small fix All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-10 14:41:11 +02:00
Louis	f188b7544c	Ajout semaine 24 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-06-10 12:00:49 +02:00
Louis	0a80581f1c	Changing date All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-28 15:37:38 +02:00
Louis	0266a6a7dc	Ajout sem 22 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-28 11:38:22 +02:00
Louis	a3aec50618	21 up	2025-05-28 11:38:00 +02:00
Louis	6069e8e276	Semaine 21	2025-05-28 11:36:58 +02:00
Louis	ec0b6160ac	Ajout réu 19 mai All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-19 17:58:29 +02:00
Louis	af550ea727	Last 20 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-19 14:19:28 +02:00
Louis	05a2acf92b	Ajout 15/05/2025 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-15 22:12:09 +02:00
Louis	ad8183faba	ajout tâches après discussion JA PB SD All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-13 22:31:24 +02:00
Louis	d85b19b95f	Ajout lien présentation All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-13 11:13:01 +02:00
Louis	08a9de982a	Ajout aujd All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-12 10:34:51 +02:00
Louis	5cd1e6bffa	ML@Aussois ✔️ All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-09 13:59:55 +02:00
Louis	852b74fd4e	zaer All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-07 14:09:57 +02:00
Louis	2752268256	Màj All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-07 13:53:54 +02:00
Louis	2eee4d8344	Update CSI All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-06 17:13:40 +02:00
Louis	626fe7acb7	Ajout res clustering exhaustif pirho All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-06 14:45:47 +02:00
Louis	1f827120bb	Ajout All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-06 14:16:56 +02:00
Louis	88fe6143fa	Ajout changements du jour All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-06 13:49:05 +02:00
Louis	d430925457	Ajout Papier All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-05 17:35:27 +02:00
Louis	a5e765f30d	fa All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-05 17:25:11 +02:00
Louis	5fde5f5d2c	Remove non present image All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-05 17:16:22 +02:00
Louis	2ef273a260	Ajout All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-05-05 17:12:37 +02:00
Louis	035c7e517f	Fix date All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-04-28 11:48:47 +02:00
Louis	385b233468	Ajout fichier semaine 18 All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-04-28 11:44:52 +02:00
Louis	3378565614	Et finito All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-04-25 16:44:42 +02:00
Louis	812708613b	Adding badge [CI SKIP]	2025-04-25 16:42:51 +02:00
Louis	7a6623591e	Disable embed-resources All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-04-25 16:41:44 +02:00
Louis	d933c8c55f	Fix CI All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-04-25 16:39:48 +02:00
Louis	3007772b87	Removing template All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2025-04-25 15:52:27 +02:00
`@ -1,2 +1,3 @@`
	`# these-recap-hebdo`	`# these-recap-hebdo`

		`![Build status](https://woodpecker.polarolouis.fr/api/badges/4/status.svg)`