Compare commits
135 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
350c1e7af8 | ||
|
|
a95b686dd5 | ||
|
|
46c72ff2f8 | ||
|
|
9d322580aa | ||
|
|
a6865af33e | ||
|
|
f4ff477719 | ||
|
|
1a1adca08d | ||
|
|
809e008e0b | ||
|
|
ee6ea17a0d | ||
|
|
5ec0010732 | ||
|
|
a608929fad | ||
|
|
fddfeac25e | ||
|
|
49582515cd | ||
|
|
2ec236cf64 | ||
|
|
b92f76ce7d | ||
|
|
e1e8c2cbdc | ||
|
|
ce020fefd4 | ||
|
|
1262ed36e9 | ||
|
|
dfb97f8ef3 | ||
|
|
b5ddca507a | ||
|
|
698190bae8 | ||
|
|
1b83ee9a41 | ||
|
|
7888eed0e0 | ||
|
|
16e5e57cef | ||
|
|
4a26113838 | ||
|
|
1775721b6c | ||
|
|
2ef05d204b | ||
|
|
09a7edd255 | ||
|
|
2fa9438bcc | ||
|
|
b011e6d08b | ||
|
|
b70506d3c6 | ||
|
|
e99b9374b2 | ||
|
|
961cfe22cf | ||
|
|
c3c132920f | ||
|
|
bc2d474a73 | ||
|
|
f696b84f9c | ||
|
|
2a96a5fab5 | ||
|
|
83f1f202a8 | ||
|
|
584580f029 | ||
|
|
4dbc745461 | ||
|
|
437bd3a09b | ||
|
|
88ae4f0776 | ||
|
|
628260e5ae | ||
|
|
0809cb6d65 | ||
|
|
81eb9f0182 | ||
|
|
ddc7ff8e6e | ||
|
|
ee00e6e792 | ||
|
|
7d9379f43d | ||
|
|
16c4a93403 | ||
|
|
d74ece2fb4 | ||
|
|
6b1d7f00ce | ||
|
|
e305d6cbd6 | ||
|
|
9161ba101e | ||
|
|
389917df6b | ||
|
|
232f67e797 | ||
|
|
495ae03acc | ||
|
|
3d329f7f3a | ||
|
|
d4c8e48a14 | ||
|
|
9b508cc881 | ||
|
|
5202f028a6 | ||
|
|
ccb6919e4c | ||
|
|
5c39792f7c | ||
|
|
cbeec14f7b | ||
|
|
1824a9ca8c | ||
|
|
9e32b94c01 | ||
|
|
4d3f697fc6 | ||
|
|
21f11bde24 | ||
|
|
62201e5eea | ||
|
|
19ce0509a9 | ||
|
|
2696c18994 | ||
|
|
f192001150 | ||
|
|
5bb6618ea3 | ||
|
|
a477053506 | ||
|
|
c63909f1e8 | ||
|
|
d780576408 | ||
|
|
f118bb0bb4 | ||
|
|
4803f5f831 | ||
|
|
aa9c40c310 | ||
|
|
2f5525a507 | ||
|
|
9cf318771c | ||
|
|
fe00b97f47 | ||
|
|
2fae04dd91 | ||
|
|
e29afb93de | ||
|
|
e1ea12a38f | ||
|
|
8fab3f726a | ||
|
|
1629798295 | ||
|
|
78e4a86700 | ||
|
|
510e6fe964 | ||
|
|
f33f46fcc9 | ||
|
|
6413d01870 | ||
|
|
23901a35d1 | ||
|
|
44e04ff2e4 | ||
|
|
3fec342474 | ||
|
|
eff1a808f2 | ||
|
|
3f743d2009 | ||
|
|
b2f6f657f5 | ||
|
|
95e3639c3f | ||
|
|
4795b8924f | ||
|
|
c60ba3fc09 | ||
|
|
076d31fb75 | ||
|
|
7209e9004d | ||
|
|
de5760047b | ||
|
|
f2ecf635ba | ||
|
|
0ddc44022b | ||
|
|
33f894449f | ||
|
|
7acb4d35fc | ||
|
|
f188b7544c | ||
|
|
0a80581f1c | ||
|
|
0266a6a7dc | ||
|
|
a3aec50618 | ||
|
|
6069e8e276 | ||
|
|
ec0b6160ac | ||
|
|
af550ea727 | ||
|
|
05a2acf92b | ||
|
|
ad8183faba | ||
|
|
d85b19b95f | ||
|
|
08a9de982a | ||
|
|
5cd1e6bffa | ||
|
|
852b74fd4e | ||
|
|
2752268256 | ||
|
|
2eee4d8344 | ||
|
|
626fe7acb7 | ||
|
|
1f827120bb | ||
|
|
88fe6143fa | ||
|
|
d430925457 | ||
|
|
a5e765f30d | ||
|
|
5fde5f5d2c | ||
|
|
2ef273a260 | ||
|
|
035c7e517f | ||
|
|
385b233468 | ||
|
|
3378565614 | ||
|
|
812708613b | ||
|
|
7a6623591e | ||
|
|
d933c8c55f | ||
|
|
3007772b87 |
1
.gitignore
vendored
|
|
@ -311,3 +311,4 @@ TSWLatexianTemp*
|
||||||
/.quarto/
|
/.quarto/
|
||||||
|
|
||||||
public/
|
public/
|
||||||
|
/.luarc.json
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,8 @@
|
||||||
|
clone:
|
||||||
|
git:
|
||||||
|
image: woodpeckerci/plugin-git
|
||||||
|
branch: develop
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
render-site:
|
render-site:
|
||||||
image: ghcr.io/quarto-dev/quarto:1.7.22
|
image: ghcr.io/quarto-dev/quarto:1.7.22
|
||||||
|
|
@ -7,7 +12,7 @@ steps:
|
||||||
when:
|
when:
|
||||||
event: [push, pull_request, cron, manual]
|
event: [push, pull_request, cron, manual]
|
||||||
branch:
|
branch:
|
||||||
- main
|
- develop
|
||||||
# Push le contenu du dossier public sur le dépôt `pages` de git.polarolouis.fr
|
# Push le contenu du dossier public sur le dépôt `pages` de git.polarolouis.fr
|
||||||
# On utilise l'image alpine/git pour avoir git et ssh
|
# On utilise l'image alpine/git pour avoir git et ssh
|
||||||
|
|
||||||
|
|
@ -16,23 +21,21 @@ steps:
|
||||||
commands:
|
commands:
|
||||||
- git config --global user.name "Woodpecker CI"
|
- git config --global user.name "Woodpecker CI"
|
||||||
- git config --global user.email "git@polarolouis.fr"
|
- git config --global user.email "git@polarolouis.fr"
|
||||||
- git clone -b pages "https://$${ACCESS_TOKEN}@git.polarolouis.fr/polarolouis/these-recap-hebdo.git" $DESTINATION
|
- git clone -b main "https://$${ACCESS_TOKEN}@git.polarolouis.fr/polarolouis/these-recap-hebdo.git" $DESTINATION
|
||||||
- rm -rf $DESTINATION/* && echo "Cleaned $DESTINATION" || echo "Failed to clean $DESTINATION"
|
- rm -rf $DESTINATION/* && echo "Cleaned $DESTINATION" || echo "Failed to clean $DESTINATION"
|
||||||
- cp -ar $CI_WORKSPACE/public/* $DESTINATION/
|
- cp -ar $CI_WORKSPACE/public/* $DESTINATION/
|
||||||
- cd $DESTINATION
|
- cd $DESTINATION
|
||||||
- ls -la
|
- ls -la
|
||||||
- git add --all
|
- git add --all
|
||||||
- git commit -m "Deploy site ${CI_BUILD_CREATED} [CI SKIP]" || echo "Nothing to commit"
|
- git commit -m "Deploy site $CI_BUILD_CREATED [CI SKIP]" || echo "Nothing to commit"
|
||||||
- git push && echo "Pushed to $DESTINATION" || echo "Failed to push to $DESTINATION"
|
- git push && echo "Pushed to $DESTINATION" || echo "Failed to push to $DESTINATION"
|
||||||
environment:
|
environment:
|
||||||
ACCESS_TOKEN:
|
ACCESS_TOKEN:
|
||||||
from_secret: access_token
|
from_secret: access_token
|
||||||
DESTINATION: pages
|
DESTINATION: pages
|
||||||
when:
|
when:
|
||||||
event:
|
event: [push, pull_request, cron, manual]
|
||||||
- push
|
|
||||||
- pull_request
|
|
||||||
branch:
|
branch:
|
||||||
- main
|
- develop
|
||||||
depends_on:
|
depends_on:
|
||||||
- render-site
|
- render-site
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,3 @@
|
||||||
# these-recap-hebdo
|
# these-recap-hebdo
|
||||||
|
|
||||||
|

|
||||||
|
|
|
||||||
7
_freeze/site_libs/clipboard/clipboard.min.js
vendored
Normal file
2
_freeze/site_libs/quarto-listing/list.min.js
vendored
Normal file
243
_freeze/site_libs/quarto-listing/quarto-listing.js
Normal file
|
|
@ -0,0 +1,243 @@
|
||||||
|
const kProgressiveAttr = "data-src";
|
||||||
|
let categoriesLoaded = false;
|
||||||
|
|
||||||
|
window.quartoListingCategory = (category) => {
|
||||||
|
if (categoriesLoaded) {
|
||||||
|
activateCategory(category);
|
||||||
|
setCategoryHash(category);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
window["quarto-listing-loaded"] = () => {
|
||||||
|
// Process any existing hash
|
||||||
|
const hash = getHash();
|
||||||
|
|
||||||
|
if (hash) {
|
||||||
|
// If there is a category, switch to that
|
||||||
|
if (hash.category) {
|
||||||
|
activateCategory(hash.category);
|
||||||
|
}
|
||||||
|
// Paginate a specific listing
|
||||||
|
const listingIds = Object.keys(window["quarto-listings"]);
|
||||||
|
for (const listingId of listingIds) {
|
||||||
|
const page = hash[getListingPageKey(listingId)];
|
||||||
|
if (page) {
|
||||||
|
showPage(listingId, page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const listingIds = Object.keys(window["quarto-listings"]);
|
||||||
|
for (const listingId of listingIds) {
|
||||||
|
// The actual list
|
||||||
|
const list = window["quarto-listings"][listingId];
|
||||||
|
|
||||||
|
// Update the handlers for pagination events
|
||||||
|
refreshPaginationHandlers(listingId);
|
||||||
|
|
||||||
|
// Render any visible items that need it
|
||||||
|
renderVisibleProgressiveImages(list);
|
||||||
|
|
||||||
|
// Whenever the list is updated, we also need to
|
||||||
|
// attach handlers to the new pagination elements
|
||||||
|
// and refresh any newly visible items.
|
||||||
|
list.on("updated", function () {
|
||||||
|
renderVisibleProgressiveImages(list);
|
||||||
|
setTimeout(() => refreshPaginationHandlers(listingId));
|
||||||
|
|
||||||
|
// Show or hide the no matching message
|
||||||
|
toggleNoMatchingMessage(list);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
window.document.addEventListener("DOMContentLoaded", function (_event) {
|
||||||
|
// Attach click handlers to categories
|
||||||
|
const categoryEls = window.document.querySelectorAll(
|
||||||
|
".quarto-listing-category .category"
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const categoryEl of categoryEls) {
|
||||||
|
const category = categoryEl.getAttribute("data-category");
|
||||||
|
categoryEl.onclick = () => {
|
||||||
|
activateCategory(category);
|
||||||
|
setCategoryHash(category);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attach a click handler to the category title
|
||||||
|
// (there should be only one, but since it is a class name, handle N)
|
||||||
|
const categoryTitleEls = window.document.querySelectorAll(
|
||||||
|
".quarto-listing-category-title"
|
||||||
|
);
|
||||||
|
for (const categoryTitleEl of categoryTitleEls) {
|
||||||
|
categoryTitleEl.onclick = () => {
|
||||||
|
activateCategory("");
|
||||||
|
setCategoryHash("");
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
categoriesLoaded = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
function toggleNoMatchingMessage(list) {
|
||||||
|
const selector = `#${list.listContainer.id} .listing-no-matching`;
|
||||||
|
const noMatchingEl = window.document.querySelector(selector);
|
||||||
|
if (noMatchingEl) {
|
||||||
|
if (list.visibleItems.length === 0) {
|
||||||
|
noMatchingEl.classList.remove("d-none");
|
||||||
|
} else {
|
||||||
|
if (!noMatchingEl.classList.contains("d-none")) {
|
||||||
|
noMatchingEl.classList.add("d-none");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function setCategoryHash(category) {
|
||||||
|
setHash({ category });
|
||||||
|
}
|
||||||
|
|
||||||
|
function setPageHash(listingId, page) {
|
||||||
|
const currentHash = getHash() || {};
|
||||||
|
currentHash[getListingPageKey(listingId)] = page;
|
||||||
|
setHash(currentHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getListingPageKey(listingId) {
|
||||||
|
return `${listingId}-page`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function refreshPaginationHandlers(listingId) {
|
||||||
|
const listingEl = window.document.getElementById(listingId);
|
||||||
|
const paginationEls = listingEl.querySelectorAll(
|
||||||
|
".pagination li.page-item:not(.disabled) .page.page-link"
|
||||||
|
);
|
||||||
|
for (const paginationEl of paginationEls) {
|
||||||
|
paginationEl.onclick = (sender) => {
|
||||||
|
setPageHash(listingId, sender.target.getAttribute("data-i"));
|
||||||
|
showPage(listingId, sender.target.getAttribute("data-i"));
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderVisibleProgressiveImages(list) {
|
||||||
|
// Run through the visible items and render any progressive images
|
||||||
|
for (const item of list.visibleItems) {
|
||||||
|
const itemEl = item.elm;
|
||||||
|
if (itemEl) {
|
||||||
|
const progressiveImgs = itemEl.querySelectorAll(
|
||||||
|
`img[${kProgressiveAttr}]`
|
||||||
|
);
|
||||||
|
for (const progressiveImg of progressiveImgs) {
|
||||||
|
const srcValue = progressiveImg.getAttribute(kProgressiveAttr);
|
||||||
|
if (srcValue) {
|
||||||
|
progressiveImg.setAttribute("src", srcValue);
|
||||||
|
}
|
||||||
|
progressiveImg.removeAttribute(kProgressiveAttr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getHash() {
|
||||||
|
// Hashes are of the form
|
||||||
|
// #name:value|name1:value1|name2:value2
|
||||||
|
const currentUrl = new URL(window.location);
|
||||||
|
const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined;
|
||||||
|
return parseHash(hashRaw);
|
||||||
|
}
|
||||||
|
|
||||||
|
const kAnd = "&";
|
||||||
|
const kEquals = "=";
|
||||||
|
|
||||||
|
function parseHash(hash) {
|
||||||
|
if (!hash) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const hasValuesStrs = hash.split(kAnd);
|
||||||
|
const hashValues = hasValuesStrs
|
||||||
|
.map((hashValueStr) => {
|
||||||
|
const vals = hashValueStr.split(kEquals);
|
||||||
|
if (vals.length === 2) {
|
||||||
|
return { name: vals[0], value: vals[1] };
|
||||||
|
} else {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter((value) => {
|
||||||
|
return value !== undefined;
|
||||||
|
});
|
||||||
|
|
||||||
|
const hashObj = {};
|
||||||
|
hashValues.forEach((hashValue) => {
|
||||||
|
hashObj[hashValue.name] = decodeURIComponent(hashValue.value);
|
||||||
|
});
|
||||||
|
return hashObj;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeHash(obj) {
|
||||||
|
return Object.keys(obj)
|
||||||
|
.map((key) => {
|
||||||
|
return `${key}${kEquals}${obj[key]}`;
|
||||||
|
})
|
||||||
|
.join(kAnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
function setHash(obj) {
|
||||||
|
const hash = makeHash(obj);
|
||||||
|
window.history.pushState(null, null, `#${hash}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function showPage(listingId, page) {
|
||||||
|
const list = window["quarto-listings"][listingId];
|
||||||
|
if (list) {
|
||||||
|
list.show((page - 1) * list.page + 1, list.page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function activateCategory(category) {
|
||||||
|
// Deactivate existing categories
|
||||||
|
const activeEls = window.document.querySelectorAll(
|
||||||
|
".quarto-listing-category .category.active"
|
||||||
|
);
|
||||||
|
for (const activeEl of activeEls) {
|
||||||
|
activeEl.classList.remove("active");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Activate this category
|
||||||
|
const categoryEl = window.document.querySelector(
|
||||||
|
`.quarto-listing-category .category[data-category='${category}'`
|
||||||
|
);
|
||||||
|
if (categoryEl) {
|
||||||
|
categoryEl.classList.add("active");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter the listings to this category
|
||||||
|
filterListingCategory(category);
|
||||||
|
}
|
||||||
|
|
||||||
|
function filterListingCategory(category) {
|
||||||
|
const listingIds = Object.keys(window["quarto-listings"]);
|
||||||
|
for (const listingId of listingIds) {
|
||||||
|
const list = window["quarto-listings"][listingId];
|
||||||
|
if (list) {
|
||||||
|
if (category === "") {
|
||||||
|
// resets the filter
|
||||||
|
list.filter();
|
||||||
|
} else {
|
||||||
|
// filter to this category
|
||||||
|
list.filter(function (item) {
|
||||||
|
const itemValues = item.values();
|
||||||
|
if (itemValues.categories !== null) {
|
||||||
|
const categories = itemValues.categories.split(",");
|
||||||
|
return categories.includes(category);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
15
_freeze/suivi/2025-50/2025-50/execute-results/html.json
Normal file
11
_macros.tex
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
\newcommand{\ELBO}[2]{\mathcal{J}(#1,#2)}
|
||||||
|
\newcommand{\R}{\mathcal{R}}
|
||||||
|
\newcommand{\ELBORTheta}{\ELBO{\R}{\pmb{\theta}}}
|
||||||
|
\newcommand{\Var}{\mathbb{V}}
|
||||||
|
\newcommand{\Esp}{\mathbb{E}}
|
||||||
|
\newcommand{\Prob}{\mathbb{P}}
|
||||||
|
\newcommand{\calL}{\mathcal{L}}
|
||||||
|
\newcommand{\Normal}{\mathcal{N}}
|
||||||
|
\DeclareMathOperator{\ilr}{ilr}
|
||||||
|
\DeclareMathOperator{\clr}{clr}
|
||||||
|
\DeclareMathOperator{\Cat}{Cat}
|
||||||
20
_quarto.yml
|
|
@ -2,24 +2,36 @@ project:
|
||||||
type: website
|
type: website
|
||||||
output-dir: public
|
output-dir: public
|
||||||
|
|
||||||
|
toc: true
|
||||||
|
number-sections: true
|
||||||
|
|
||||||
website:
|
website:
|
||||||
title: "Suivi de la thèse"
|
title: "Suivi de la thèse"
|
||||||
navbar:
|
navbar:
|
||||||
left:
|
left:
|
||||||
- href: index.qmd
|
- icon: journals
|
||||||
|
href: index.qmd
|
||||||
text: "Liste des semaines"
|
text: "Liste des semaines"
|
||||||
|
right:
|
||||||
|
- icon: git
|
||||||
|
href: https://git.polarolouis.fr/polarolouis/these-recap-hebdo
|
||||||
|
aria-label: Dépôt Git du journal
|
||||||
|
|
||||||
lang: fr
|
lang: fr
|
||||||
|
|
||||||
date: last-modified
|
date: last-modified
|
||||||
|
date-modified: last-modified
|
||||||
|
|
||||||
author:
|
author:
|
||||||
name: "Louis LACOSTE"
|
name: Louis Lacoste
|
||||||
email: "louis.lacoste@agroparistech.fr"
|
email: louis.lacoste@agroparistech.fr
|
||||||
|
affiliation: MIA Paris-Saclay, INRAE, AgroParisTech, Université Paris-Saclay
|
||||||
|
orcid: 0009-0004-0178-9821
|
||||||
|
github: Polarolouis
|
||||||
|
|
||||||
format:
|
format:
|
||||||
html:
|
html:
|
||||||
theme: yeti
|
theme: yeti
|
||||||
toc: true
|
toc: true
|
||||||
html-math-method: katex
|
html-math-method: katex
|
||||||
embed-resources: true
|
embed-resources: false
|
||||||
20
index.qmd
|
|
@ -1,8 +1,28 @@
|
||||||
---
|
---
|
||||||
title: "Journal suivi de la thèse"
|
title: "Journal suivi de la thèse"
|
||||||
listing:
|
listing:
|
||||||
|
- id: journal-these
|
||||||
contents: suivi
|
contents: suivi
|
||||||
type: default
|
type: default
|
||||||
sort: "date desc"
|
sort: "date desc"
|
||||||
categories: true
|
categories: true
|
||||||
|
page-size: 5
|
||||||
|
- id: knowledge-base
|
||||||
|
contents: knowledge_base
|
||||||
|
type: default
|
||||||
|
sort: "date desc"
|
||||||
|
categories: true
|
||||||
---
|
---
|
||||||
|
|
||||||
|
::: {.callout-note icon="false" collapse="true"}
|
||||||
|
## Agenda
|
||||||
|
<iframe src="https://calendar.google.com/calendar/embed?height=400&wkst=2&ctz=Europe%2FParis&showPrint=0&mode=AGENDA&src=NTc4ZDI5ZGIwZmFiMGZjZjk1ZWM2NjQ4OWFjYTFmYzkxNzAyMGU2ODk1YjRmMTQ1NjA1YTRlMWU0MzU3N2FkOUBncm91cC5jYWxlbmRhci5nb29nbGUuY29t&color=%234285f4" style="border:solid 1px #777" width="100%" height="400" frameborder="0" scrolling="no"></iframe>
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Base de connaissances et trucs en vrac
|
||||||
|
:::{#knowledge-base}
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Journaux
|
||||||
|
:::{#journal-these}
|
||||||
|
:::
|
||||||
|
|
|
||||||
3
knowledge_base/_metadata.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
categories: []
|
||||||
|
date: last-modified
|
||||||
|
date-modified: last-modified
|
||||||
34
knowledge_base/colsbm_application_reseaux_et_agri.qmd
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
---
|
||||||
|
title: "Pour application du modèle colBiSBM sur données interaction PP et pratiques agricoles"
|
||||||
|
categories: [application, agricole, graphe, collection, lbm, sbm]
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
# Idée de l'application
|
||||||
|
|
||||||
|
En discutant avec Alizée et grâce aux ressources de la [section "Liens" données par Jean](#liens) possible d'essayer de voir l'impact sur la structure des réseaux plantes-pollinisateurs des pratiques agricoles autour des espaces de pollinisation.
|
||||||
|
|
||||||
|
# Point à éclaircir
|
||||||
|
|
||||||
|
1. Quels réseaux plantes-pollinisateurs choisir, où les trouver ? Besoin de réseaux en France pour la facilité.
|
||||||
|
2. Faut-il utiliser les covariables seulement de manière *post-hoc* pour corréler avec le *clustering* de réseaux obtenus ?
|
||||||
|
3. Comment encoder les covariables ?
|
||||||
|
- Est-ce que je les mets sous forme de pourcentage dans un *buffer* (quel rayon ?) comme Jean ? Alors problèmes inhérents aux données compositionnelles mais facilité d'exécution ?
|
||||||
|
- Quelle distance considérer pour l'impact des pratiques agricoles, distance variables par pollinisateurs en soit ? Besoin de connaissances expertes.
|
||||||
|
- Besoin d'homogénéiser les échelles ? Ou a minima d'en choisir une ou plusieurs à considérer pour les covariables ?
|
||||||
|
- Gestion de gros tableaux de données pas simple.
|
||||||
|
4. **Le temps ???**
|
||||||
|
|
||||||
|
# Liens
|
||||||
|
|
||||||
|
CORINE Land Cover et extraction en R
|
||||||
|
Très gros grain :
|
||||||
|
<https://fr.wikipedia.org/wiki/Corine_Land_Cover> et le package de Jean pour l'extraction des *buffers* de types d'utilisation des sols :
|
||||||
|
<https://github.com/jean-cohen/corine.land.cover.landuse.extraction>
|
||||||
|
|
||||||
|
Les cartes de données :
|
||||||
|
|
||||||
|
- Carte du Bio et des types de cultures échelle parcelle : <https://www.agencebio.org/cartobio/>
|
||||||
|
- Échelle code postal, achat de phytosanitaires : <https://ventes-produits-phytopharmaceutiques.eaufrance.fr/>
|
||||||
|
- Thèse de Milena Cairo, classification des parcelles selon les pratiques en pesticides : <https://theses.hal.science/tel-05038286>
|
||||||
|
- Recensement des parcelles et du type de culture : <https://cartes.gouv.fr/rechercher-une-donnee/dataset/IGNF_RPG?redirected_from=geoservices.ign.fr>
|
||||||
BIN
knowledge_base/figs/projets-phylo/dag-simple.pdf
Normal file
54
knowledge_base/figs/projets-phylo/dag-simple.tex
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
\documentclass{standalone}
|
||||||
|
|
||||||
|
\usepackage{tikz}
|
||||||
|
|
||||||
|
\usetikzlibrary{positioning,shapes.arrows, arrows.meta,shapes.geometric}
|
||||||
|
\begin{document}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\tikzset{
|
||||||
|
every path/.append style = {
|
||||||
|
arrows = ->,
|
||||||
|
> = stealth,
|
||||||
|
},
|
||||||
|
every node/.append style = {
|
||||||
|
shape = circle,
|
||||||
|
draw = black,
|
||||||
|
minimum size=3em
|
||||||
|
},
|
||||||
|
latent/.style = {
|
||||||
|
fill = lightgray
|
||||||
|
},
|
||||||
|
prior/.style = {
|
||||||
|
fill = red
|
||||||
|
},
|
||||||
|
moral/.style = {
|
||||||
|
dashed,
|
||||||
|
> = {}, % remove arrow tip
|
||||||
|
arrows = -, % ensure no arrows
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
\node (y) {$Y$};
|
||||||
|
\node[latent] (z) [above left = of y] {$Z$};
|
||||||
|
\node[latent] (w) [above right = of y] {$W$};
|
||||||
|
|
||||||
|
\node[latent] (P) [above = of z] {$P$};
|
||||||
|
\node[prior] (sigma2) [above = of P] {$\sigma^2$};
|
||||||
|
\node[prior] (rho) [above = of w] {$\rho_{1:R}$};
|
||||||
|
\node[prior] (alpha) [below = of y] {$\pmb{\alpha}$};
|
||||||
|
|
||||||
|
\path (z) edge (y);
|
||||||
|
\path (w) edge (y);
|
||||||
|
\path (rho) edge (w);
|
||||||
|
\path (alpha) edge (y);
|
||||||
|
\path (P) edge (z);
|
||||||
|
\path (sigma2) edge (P);
|
||||||
|
|
||||||
|
% moral
|
||||||
|
\path[moral] (z) edge (alpha);
|
||||||
|
\path[moral] (w) edge (alpha);
|
||||||
|
\path[moral] (z) edge (w);
|
||||||
|
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
178
knowledge_base/projets-phylo.qmd
Normal file
|
|
@ -0,0 +1,178 @@
|
||||||
|
---
|
||||||
|
title: "Idées autour de l'inclusion de la phylogénie"
|
||||||
|
categories: [phylogénie, graphes, lbm, sbm]
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
# Contexte de l'inclusion de la phylogénie dans l'estimation de la structure des interactions
|
||||||
|
|
||||||
|
Dans le 3e axe de ma thèse nous souhaitons inclure de l'information phylogénétique dans l'estimation de la structure des réseaux d'interaction microbiens.
|
||||||
|
|
||||||
|
1. Ces réseaux se présentent sous la forme de matrice des comptages hautement rectangulaire, c'est-à-dire avec un grand nombre de microorganismes et, en comparaison, peu d'échantillons (de sols, d'aliments, de patients...). Cette haute dimensionnalité met en échec les méthodes classiques non concues pour gérer autant de noeuds (SBM). Il s'agit donc d'un **premier enjeu**
|
||||||
|
|
||||||
|
2. Les données de comptages de ces matrices sont compositionnelles : la profondeur de séquençage (le nombre de séquences lues) étant finie, cela implique une dépendance entre les comptages observés. Si une séquence est surexprimée par rapport aux autres, alors que l'abondance réelle des autres n'a pas changée, les comptages observés des autres séquences vont diminuer. Voir [la note sur les données compositionnelles](#note-donnees-compo).
|
||||||
|
|
||||||
|
::: {#note-donnees-compo .callout-note title="Données compositionelles"}
|
||||||
|
Soit $N$ la profondeur de séquençage, $\forall s \in \{1,\dots,s\}, n_s$ le nombre réel de fois où la séquence $s$ est présente, $t = \sum_s n_s$ la somme des séquences totale. Les comptages observés $o_s$ pour la séquence $s$ sont $o_s = \dfrac{n_s}{N}$, et on a $\sum_{s} o_s = \dfrac{1}{N} \sum_{s} n_s$ par construction.
|
||||||
|
|
||||||
|
Et donc à pour un $S$ quelconque on a $o_S = \dfrac{t}{N} - \sum_{s, s\neq S} o_s$ et donc une contrainte sur les $o_s$.
|
||||||
|
:::
|
||||||
|
|
||||||
|
Diverses autres enjeux se posent quand on considère ce type de données. Par exemple, l'arbre phylogénétique peut ne pas être directement accessible, ou bien être dominé par un certain clade. Il peut aussi exploser en nombre d'individu à chaque niveau (à relier au point 1).
|
||||||
|
|
||||||
|
## Formalisme commun
|
||||||
|
|
||||||
|
Dans la suite, nous considèrerons $\mathcal{T}$ l'arbre ayant $L$ niveaux, indexés de $l = 0,\dots,L$ avec $0$ la racine commune et $L$ les feuilles de l'arbre.
|
||||||
|
|
||||||
|
$Y$ la matrice de bi-adjacence encodant le graphe et modélisant les interactions, de taille $n_1\times n_2$.
|
||||||
|
|
||||||
|
$V, X$ les matrices de covariable sur les noeuds en ligne et en colonnes de $Y$. $V$ est de taille $n_1 \times d$ et $X$ est de taille $n_2 \times p$
|
||||||
|
|
||||||
|
# SBM (ou LBM) Séquentiel
|
||||||
|
|
||||||
|
## Formalisation de l'idée
|
||||||
|
|
||||||
|
Ici on utilise l'arbre phylogénétique afin d'initialiser l'EM variationnel du niveau suivant.
|
||||||
|
|
||||||
|
Concrètement, on ajuste un LBM au niveau $l$, sur la matrice de comptage aggrégées à ce niveau $Y^l$, ce qui donne des probabilités variationnelles $\pmb{\tau}^{1,l},\pmb{\tau}^{2,l}$ qui sont de tailles respectives $n_{1,l} \times Q_{l}$ et $n_{2,l} \times R_{l}$.
|
||||||
|
|
||||||
|
Puis pour tout individu $u\in \text{Child}(i)$, on initialise ses probas $\widetilde{\tau}^{1,l+1}_u = \tau^{1,l}_u + \varepsilon_{u}$, avec $\varepsilon_u \sim \mathcal{N}_{Q_l}(0,\sigma^2)$ et on renormalise $\tau^{1,l+1}_{u} = \dfrac{\widetilde{\tau}^{1,l+1}_u}{\sum_q \widetilde{\tau}^{1,l+1}_{u,q}}$. On ajoute une perturbation afin de ne pas rester bloqué sur le point fixe précédent et de pouvoir donc obtenir les $\tau^{1,l+1}$ à l'issue de l'optimisation.
|
||||||
|
|
||||||
|
## Limites de l'approche
|
||||||
|
|
||||||
|
Le passage d'information selon l'arbre nous semble intuitivement être une bonne approche et les résultats que nous avons obtenues indique qu'un peu d'information semble passer mais il faut aller profondément dans l'arbre et alors on rencontre le problème du coût computationnel.
|
||||||
|
En effet cette méthode ne diminue pas le coût en calcul puisqu'elle calcule un LBM à chacun des $L$ niveaux, au mieux elle donne un point d'initialisation intelligent mais cela semble difficilement applicable à des données réelles.
|
||||||
|
|
||||||
|
|
||||||
|
# SBM et LBM avec covariables sur les noeuds
|
||||||
|
|
||||||
|
Ce modèle visent à intégrer des covariables de noeuds comme modificateurs des probabilités *a priori* d'appartenance aux groupes.
|
||||||
|
Pour la phylogénie, en passant par une MDS ou une autre méthode permettant à partir des distances phylogénétique d'obtenir des "positions" ou des covariables, cela permettrait d'injecter l'a priori phylogénétique dans l'estimation de la structure du réseau.
|
||||||
|
|
||||||
|
## Formalisation du modèle
|
||||||
|
|
||||||
|
Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
|
||||||
|
W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
|
||||||
|
Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Voici pour les probas pour les individus en colonne de la matrice d'adjacence :
|
||||||
|
\begin{align*}
|
||||||
|
\pmb{\beta}_{r}& = \begin{pmatrix}
|
||||||
|
\beta_{r,0}\\
|
||||||
|
\vdots\\
|
||||||
|
\beta_{r,p}
|
||||||
|
\end{pmatrix}, & X_{j,\bullet} = \begin{pmatrix}
|
||||||
|
1 = x_{0,j} & x_{1,j} & \dots & x_{p,j}
|
||||||
|
\end{pmatrix}\\
|
||||||
|
X_{j,\bullet} \pmb{\beta}_r& = \beta_{r,0} x_{0,j} + \beta_{r,1} x_{1,j} + \dots + \beta_{r,p} x_{p,j} & \approx \log(\rho_r^j) \\
|
||||||
|
B & = \begin{pmatrix}
|
||||||
|
\pmb{\beta}_1 \dots \pmb{\beta}_r \dots \pmb{\beta}_R
|
||||||
|
\end{pmatrix} & X_{j,\bullet}B \approx \log(\pmb{\rho}^j) \\
|
||||||
|
X B & \approx \log((\pmb{\rho}^j)_{j=1,\dots,n_2}) = \log(\pmb{\Rho})\\
|
||||||
|
\end{align*}
|
||||||
|
avec les $\beta, B$ qui désigne donc les coefficient de la combinaison linéaire et $X$ les covariables des individus (taille $n_2\times p$, $p$ covariables).
|
||||||
|
|
||||||
|
Et pour les probas en lignes du LBM :
|
||||||
|
\begin{align*}
|
||||||
|
\pmb{\gamma}_{q}& = \begin{pmatrix}
|
||||||
|
\gamma_{q,0}\\
|
||||||
|
\vdots\\
|
||||||
|
\gamma_{q,d}
|
||||||
|
\end{pmatrix}, & V_{i,\bullet} = \begin{pmatrix}
|
||||||
|
1 = v_{0,i} & v_{1,i} & \dots & v_{d,i}
|
||||||
|
\end{pmatrix}\\
|
||||||
|
V_{i,\bullet} \pmb{\gamma}_q & = \gamma_{q,0} v_{0,i} + \gamma_{q,1} v_{1,i} + \dots + \gamma_{q,d} v_{d,i} & \approx \log(\pi_q^i) \\
|
||||||
|
\Gamma & = \begin{pmatrix}
|
||||||
|
\gamma_1 \dots \pmb{\gamma}_q \dots \pmb{\gamma}_Q
|
||||||
|
\end{pmatrix} & V_{i,\bullet} \Gamma \approx \log(\pmb{\pi}^i) \\
|
||||||
|
V \Gamma & \approx \log((\pmb{\pi}^i)_{i=1,\dots,n_1}) = \log(\pmb{\Pi})
|
||||||
|
\end{align*}
|
||||||
|
avec les $\gamma, G$ qui désigne donc les coefficient de la combinaison linéaire et $V$ les covariables des individus (taille $n_1\times d$, $d$ covariables).
|
||||||
|
|
||||||
|
## Preuve de l'identifiabilité
|
||||||
|
Soient $B,B^{\prime}$ avec $B_{\bullet,R} = B^{\prime}_{\bullet,R} = \vec{0}_{p+1}$ et $X$ de rang plein tel que $X^{\top}X$ soit inversible.
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
&\sigma(XB) = \sigma(XB^{\prime})\\
|
||||||
|
&\implies \exists C = \begin{pmatrix}c_1 \\ \vdots \\ c_j \\ \vdots \\ c_{n_2}\end{pmatrix} \in \mathbb{R}^{n_2}, X B = X B^{\prime} + C \pmb{1}_{R}^{\top} \\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, (X B)_{j,r} = (X B^{\prime})_{j,r} + (C \pmb{1}_{R}^{\top})_{j,r} \\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, \forall r\in\{1\dots,R\}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \beta_{k,r} = \sum_{k=1}^{p+1} x_{j,k} \beta^{\prime}_{k,r} + c_j\\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \beta_{k,R} = \sum_{k=1}^{p+1} x_{j,k} \beta^{\prime}_{k,R} + c_j \\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \times 0 = \sum_{k=1}^{p+1} x_{j,k} \times 0 + c_j \\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, 0 = 0 + c_j \implies c_j = 0 \\
|
||||||
|
&\implies C = \begin{pmatrix} 0 \\ \vdots \\ 0 \end{pmatrix} \text{and thus}, XB = XB^{\prime} \\
|
||||||
|
& \implies (X^{\top} X)^{-1}X^{\top} X B = (X^{\top} X)^{-1}X^{\top} X B^{\prime} \implies B=B^{\prime}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
## Inférence
|
||||||
|
|
||||||
|
Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
|
||||||
|
|
||||||
|
$$
|
||||||
|
\ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
|
||||||
|
+ \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r} \\
|
||||||
|
- \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
|
||||||
|
$$
|
||||||
|
|
||||||
|
Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Et sous la contrainte d'[identifiabilité](#preuve-de-lidentifiabilité) que l'un des $(\beta_r)_{r=1,\dots,R}$ soit nul, ici $\beta_R = 0$.
|
||||||
|
|
||||||
|
La partie pertinente de l'ELBO devient:
|
||||||
|
$$
|
||||||
|
P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
|
||||||
|
$${#eq-modele-covar-prop}
|
||||||
|
|
||||||
|
Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
|
||||||
|
\begin{align*}
|
||||||
|
\dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
|
||||||
|
& = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr]
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
## Implémentation
|
||||||
|
|
||||||
|
J'ai implémenté tout ça dans un *fork* de [blockmodels](https://github.com/GrossSBM/blockmodels). Ce fork est disponible [ici](https://github.com/Polarolouis/blockmodels) et **en cours de relecture par JBL**.
|
||||||
|
|
||||||
|
Pour les détails techniques, j'ai ré-écrit la gestion des *memberships* en R pour passer les covariables et coefficients nécessaires aux calculs. J'ai implémenté une descente de gradient en utilisant un algorithme de type BFGS pour l'optimisation des coefficients de la combinaison linéaire. Et enfin j'ai intégré plusieurs choses dans le package R [sbm](https://github.com/GrossSBM/sbm):
|
||||||
|
|
||||||
|
1. [La gestion des covariables de noeuds](https://github.com/GrossSBM/sbm/tree/nodescovariates)
|
||||||
|
2. Le support des [valeurs manquantes](https://github.com/GrossSBM/sbm/tree/feat/NAsupport)
|
||||||
|
|
||||||
|
## La suite
|
||||||
|
|
||||||
|
Maintenant, Sophie et Pierre gèrent la rédaction de vignettes et de simulations autour de ces fonctionnalités.
|
||||||
|
|
||||||
|
Nous attendons de voir si l'on trouve un jeu de données adaptées pour cette méthode.
|
||||||
|
|
||||||
|
**Limites** : Ce modèle ne permet pas le passage à l'échelle pour les gros réseaux que représentent les matrices de comptage.
|
||||||
|
|
||||||
|
# LBM avec dépendance latente entre les probabilités *a priori*
|
||||||
|
|
||||||
|
## Formalisation du modèle
|
||||||
|
|
||||||
|
Pierre a proposé que l'on pose une structure latente sur les $\pmb{Z}$. C'est à dire
|
||||||
|
\begin{align*}
|
||||||
|
& P \sim \Normal_{n_1, K-1} (O_{n_1, K-1}, \Sigma, \sigma^2 Id_{K-1}), \\
|
||||||
|
\forall i \in \{1,\dots,n_1\}, & Z_i \mid P_i \overset{ind}{\sim} \Cat_{K} ({\ilr}^{-1}(P_i) = \pi_{1:K}^{(i)}), \\
|
||||||
|
\forall j \in \{1,\dots,n_2\}, & W_j \overset{iid}{\sim} \Cat_R (\rho_{1:R}),\\
|
||||||
|
\forall i,j \in \{1,\dots,n_1\}\times\{1,\dots,n_2\}, & Y_{ij} \mid Z_i = k, W_j = r \overset{ind}{\sim} \mathcal{F}(\alpha_{qr}),
|
||||||
|
\end{align*}
|
||||||
|
avec $\Sigma$, la matrice de variance-covariance déterminée en fonction de l'apparentement (phylogénétique) des noeuds.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
# Échantillonnage selon l'arbre
|
||||||
|
|
||||||
|
Afin d'affronter le coût computationnel que représente l'ajustement
|
||||||
|
|
||||||
|
# *Latent Position Model* (LPM) avec phylogénie des représentations latentes selon la phylogénie
|
||||||
|
|
||||||
|
## Classique
|
||||||
|
|
||||||
|
## *Deep* LPM
|
||||||
|
|
||||||
|
Possibilité d'utilisé un encodeur qui soit un réseau de neurones et de bénéficier de tous les décodeurs de la littérature LPM.
|
||||||
|
**Quel est le lien avec le VGAE ?**
|
||||||
|
**Avantage de passage à l'échelle??**
|
||||||
39
knowledge_base/vae_wasserstein_gromov.qmd
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
---
|
||||||
|
title: "Variational Graph AutoEncoder with Wasserstein"
|
||||||
|
categories: [convolution, machine learning, vae, graphes]
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
Suite à la discussion avec Julian j'inscris ce que l'on s'est dit.
|
||||||
|
|
||||||
|
# Idée principale
|
||||||
|
|
||||||
|
Les VAE avec convolution de graphes (GCN) permettent d'apprendre une représentation latente des noeuds d'un graphe basée sur les interactions entre noeuds.
|
||||||
|
|
||||||
|
**Objectif** : apprendre un même encodeur et donc un espace latent structuré pour clusteriser une collection de réseaux sur la base de la structure.
|
||||||
|
*Sous-objectif* : pouvoir prendre en compte des covariables (Fused Wasserstein ?).
|
||||||
|
|
||||||
|
Principe du VAE:
|
||||||
|
|
||||||
|
Soit $Y$ une matrice d'adjacence (ou de bi-adjacence pour les graphes bipartites), $X$ une matrice de covariables.
|
||||||
|
|
||||||
|
Soit $D_1$ la matrice des degrés en ligne, $D_2$ la matrice des degrés en colonne.
|
||||||
|
|
||||||
|
$\widetilde{Y} = D_1^{-1/2} Y D_2^{-1/2}$
|
||||||
|
|
||||||
|
**à compléter**
|
||||||
|
|
||||||
|
# Apprentissage contrastif
|
||||||
|
|
||||||
|
Puisque l'on voudrait marquer la séparation entre différentes structures de réseaux, on pourrait vouloir faire de l'[apprentissage contrastif pour V(G)AE](https://u9534056.medium.com/an-overview-of-contrastive-learning-fa520f5f2c23).
|
||||||
|
|
||||||
|
## Hypersphère méga cool
|
||||||
|
|
||||||
|
Il faut creuser : forcer les contraintes des *embeddings* à vivre sur la surface d'une hypersphère car, d'après Julian et la littérature, par rapport à un espace euclidien cela permet d'avoir :
|
||||||
|
|
||||||
|
- position latente bornée : stabilisation de l'apprentissage et évite l'explosion dans une ou plusieurs directions.
|
||||||
|
- couverture "uniforme" de la sphère : tendance à faciliter l'apprentissage contrastif, avec l'idée de bien séparer les graphes aux structures différentes.
|
||||||
|
|
||||||
|
[Première source](https://www.envisioning.com/vocab/hyperspherical-representation-learning)
|
||||||
|
|
||||||
|
Le softmax est remplacée par la loi de von Mises-Fisher. D'après [Wikipédia](https://fr.wikipedia.org/wiki/Loi_de_von_Mises-Fisher#Relation_avec_la_loi_normale) équivalent de la loi normale multivariée à covariance isotrope restreinte à l'hypersphère unité.
|
||||||
|
|
@ -2,9 +2,8 @@
|
||||||
title: "Bilan semaine 17 2025 : 24 avril - 25 avril"
|
title: "Bilan semaine 17 2025 : 24 avril - 25 avril"
|
||||||
categories:
|
categories:
|
||||||
- colBiSBM
|
- colBiSBM
|
||||||
format:
|
|
||||||
html:
|
date: 25 04 2025
|
||||||
embed-resources: true
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## A faire
|
## A faire
|
||||||
|
|
|
||||||
95
suivi/2025-18/2025-18.qmd
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 18 2025 : 28 avril - 2 mai"
|
||||||
|
categories: [colBiSBM, inférence]
|
||||||
|
date: 2025 05 02
|
||||||
|
---
|
||||||
|
|
||||||
|
## A faire
|
||||||
|
|
||||||
|
### Stratégie suite : Inférence
|
||||||
|
|
||||||
|
- Lire biblio fournie Julie, Inférence de réseaux : co-occurence
|
||||||
|
|
||||||
|
- Papier pour comprendre données
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
|
|
||||||
|
### Rédaction article
|
||||||
|
|
||||||
|
- Relire intro St Clair
|
||||||
|
- S'inspirer structure pour mon intro
|
||||||
|
- Trouver biblio intro
|
||||||
|
- Rédiger l'intro
|
||||||
|
- Regarder les applications pour les collections de réseaux recommender system
|
||||||
|
- Lire les papiers de Baldock Traveset Souza Cordeniz Trojelsgaard et Gibson
|
||||||
|
|
||||||
|
- Dire résultats nettement meilleurs et variabilités inférieures.
|
||||||
|
|
||||||
|
- Intégrer les retours de Sophie
|
||||||
|
|
||||||
|
### Simulations article
|
||||||
|
|
||||||
|
- Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
|
||||||
|
|
||||||
|
- Corriger structure de simus :
|
||||||
|
- Pour noisy $\alpha$ :
|
||||||
|
- Logit pour envoyer la gaussienne vers (0,1)
|
||||||
|
- Beta contrainte dans (0,1)
|
||||||
|
- Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
|
||||||
|
Car densités déséquilibrées.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Autour de l'article et du package
|
||||||
|
|
||||||
|
- Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
|
||||||
|
|
||||||
|
## J'ai fait
|
||||||
|
|
||||||
|
### JdS
|
||||||
|
|
||||||
|
- colDEM CSE
|
||||||
|
|
||||||
|
|
||||||
|
### Clustering exhaustif Baldock
|
||||||
|
|
||||||
|
- Le clustering de toutes les 52 partitions s'est fait en 5h30 ! (Mémoïsation)
|
||||||
|
|
||||||
|
- Pour iid la meilleure partition avec $BICL=-9466.911$ contre $BICL_{algo} = -9466.873 \pm 0.02205$ trouvé avec l'algo
|
||||||
|

|
||||||
|
|
||||||
|
- Pour $\pi\rho$ la meilleure partition avec $BICL = -9497.92$ contre $BICL_{algo} =-9497.92 \pm 0.00009$
|
||||||
|

|
||||||
|
|
||||||
|
## A continuer
|
||||||
|
|
||||||
|
- Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever
|
||||||
|
$\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code).
|
||||||
|
Implémenté les missing steps *en attente des résultats MIGALE*.
|
||||||
|
|
||||||
|
- Lire Biological Networks - François Képès
|
||||||
|
|
||||||
|
- Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
|
||||||
|
|
||||||
|
> Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
|
||||||
|
(à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
|
||||||
|
|
||||||
|
### Simulations article
|
||||||
|
|
||||||
|
- Relancer simulations de clustering avec $M = 30$ où $M_i = 10, \forall i$. En attente retour MIGALE
|
||||||
|
Relancer simus clustering avec VEM steps = 10 000 et plus nombreux init pour spectral. Ajouter simu clustering métriques nb sous-collections obtenues.
|
||||||
|
Vérifier les résultats obtenus si ARI = 0. Et augmenter la taille $M = 30$ avec $M_1 = M_2 = M_3 = 10$.
|
||||||
BIN
suivi/2025-18/figs/density-subdore.png
Normal file
|
After Width: | Height: | Size: 55 KiB |
403
suivi/2025-18/figs/partition-iid.svg
Normal file
|
|
@ -0,0 +1,403 @@
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" class="httpgd" width="1367.00" height="979.00" viewBox="0 0 1093.60 783.20">
|
||||||
|
<defs>
|
||||||
|
<style type='text/css'><![CDATA[
|
||||||
|
.httpgd line, .httpgd polyline, .httpgd polygon, .httpgd path, .httpgd rect, .httpgd circle {
|
||||||
|
fill: none;
|
||||||
|
stroke: #000000;
|
||||||
|
stroke-linecap: round;
|
||||||
|
stroke-linejoin: round;
|
||||||
|
stroke-miterlimit: 10.00;
|
||||||
|
}
|
||||||
|
]]></style>
|
||||||
|
<clipPath id="c0"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c1"><rect x="46.58" y="29.02" width="941.56" height="618.33"/></clipPath>
|
||||||
|
<clipPath id="c2"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c3"><rect x="46.58" y="29.02" width="941.56" height="222.24"/></clipPath>
|
||||||
|
<clipPath id="c4"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c5"><rect x="46.58" y="29.02" width="941.56" height="222.24"/></clipPath>
|
||||||
|
<clipPath id="c6"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c7"><rect x="46.58" y="29.02" width="941.56" height="222.24"/></clipPath>
|
||||||
|
<clipPath id="c8"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c9"><rect x="46.58" y="29.02" width="941.56" height="222.24"/></clipPath>
|
||||||
|
<clipPath id="c10"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c11"><rect x="307.96" y="134.16" width="780.16" height="285.47"/></clipPath>
|
||||||
|
<clipPath id="c12"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c13"><rect x="307.96" y="23.54" width="780.16" height="110.62"/></clipPath>
|
||||||
|
<clipPath id="c14"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c15"><rect x="5.48" y="134.16" width="302.48" height="285.47"/></clipPath>
|
||||||
|
<clipPath id="c16"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c17"><rect x="677.42" y="141.63" width="109.63" height="109.63"/></clipPath>
|
||||||
|
<clipPath id="c18"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c19"><rect x="659.15" y="29.02" width="146.17" height="73.09"/></clipPath>
|
||||||
|
<clipPath id="c20"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c21"><rect x="64.90" y="141.63" width="219.26" height="109.63"/></clipPath>
|
||||||
|
<clipPath id="c22"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c23"><rect x="46.58" y="425.11" width="941.56" height="222.24"/></clipPath>
|
||||||
|
<clipPath id="c24"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c25"><rect x="46.58" y="425.11" width="941.56" height="222.24"/></clipPath>
|
||||||
|
<clipPath id="c26"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c27"><rect x="46.58" y="425.11" width="941.56" height="222.24"/></clipPath>
|
||||||
|
<clipPath id="c28"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c29"><rect x="46.58" y="425.11" width="941.56" height="222.24"/></clipPath>
|
||||||
|
<clipPath id="c30"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c31"><rect x="320.63" y="530.25" width="767.49" height="247.47"/></clipPath>
|
||||||
|
<clipPath id="c32"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c33"><rect x="320.63" y="419.63" width="767.49" height="110.62"/></clipPath>
|
||||||
|
<clipPath id="c34"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c35"><rect x="5.48" y="530.25" width="315.15" height="247.47"/></clipPath>
|
||||||
|
<clipPath id="c36"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c37"><rect x="628.21" y="537.72" width="182.72" height="109.63"/></clipPath>
|
||||||
|
<clipPath id="c38"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c39"><rect x="536.85" y="425.11" width="365.44" height="73.09"/></clipPath>
|
||||||
|
<clipPath id="c40"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
<clipPath id="c41"><rect x="46.58" y="547.78" width="268.57" height="89.52"/></clipPath>
|
||||||
|
<clipPath id="c42"><rect x="0.00" y="0.00" width="1093.60" height="783.20"/></clipPath>
|
||||||
|
</defs>
|
||||||
|
<rect width="100%" height="100%" style="stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g clip-path="url(#c0)">
|
||||||
|
<rect x="0.00" y="0.00" width="1093.60" height="783.20" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c11)">
|
||||||
|
<rect x="307.96" y="134.16" width="780.16" height="285.47" style="stroke-width: 2.13;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c13)">
|
||||||
|
<rect x="307.96" y="23.54" width="780.16" height="110.62" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c15)">
|
||||||
|
<rect x="5.48" y="134.16" width="302.48" height="285.47" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c17)">
|
||||||
|
<rect x="677.42" y="141.63" width="109.63" height="109.63" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<polyline points="677.42,141.63 787.05,141.63" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="677.42,196.45 787.05,196.45" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="677.42,251.26 787.05,251.26" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="677.42,251.26 677.42,141.63" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="732.24,251.26 732.24,141.63" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="787.05,251.26 787.05,141.63" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="677.42,169.04 787.05,169.04" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="677.42,223.86 787.05,223.86" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="704.83,251.26 704.83,141.63" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="759.65,251.26 759.65,141.63" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<rect x="677.42" y="141.63" width="54.82" height="54.82" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FF9E81;"/>
|
||||||
|
<rect x="732.24" y="141.63" width="54.82" height="54.82" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFCBB9;"/>
|
||||||
|
<rect x="677.42" y="196.45" width="54.82" height="54.82" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFCEBD;"/>
|
||||||
|
<rect x="732.24" y="196.45" width="54.82" height="54.82" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFAF9;"/>
|
||||||
|
<line x1="677.42" y1="196.45" x2="787.05" y2="196.45" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="677.42" y1="251.26" x2="787.05" y2="251.26" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="732.24" y1="251.26" x2="732.24" y2="141.63" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="787.05" y1="251.26" x2="787.05" y2="141.63" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="704.83" y="172.98" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.5</text></g>
|
||||||
|
<g><text x="759.65" y="172.98" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.27</text></g>
|
||||||
|
<g><text x="704.83" y="227.80" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.26</text></g>
|
||||||
|
<g><text x="759.65" y="227.80" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.02</text></g>
|
||||||
|
<rect x="677.42" y="141.63" width="109.63" height="109.63" style="stroke-width: 2.13;stroke: #333333;"/>
|
||||||
|
</g><g clip-path="url(#c18)">
|
||||||
|
<g><text x="670.70" y="173.33" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="670.70" y="228.14" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<polyline points="673.69,169.04 677.42,169.04" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="673.69,223.86 677.42,223.86" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="704.83,255.00 704.83,251.26" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="759.65,255.00 759.65,251.26" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="704.83" y="266.56" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="759.65" y="266.56" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
</g><g clip-path="url(#c19)">
|
||||||
|
<rect x="659.15" y="29.02" width="146.17" height="73.09" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="665.79" y="38.16" width="6.04" height="54.82" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="671.84" y="38.16" width="126.85" height="54.82" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<g><text x="735.26" y="69.51" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.95</text></g>
|
||||||
|
</g><g clip-path="url(#c20)">
|
||||||
|
<polyline points="659.15,102.11 659.15,29.02" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="654.22" y="68.71" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="137.92px" lengthAdjust="spacingAndGlyphs">Baldock2011_TB+Baldock2011_JN</text></g>
|
||||||
|
<polyline points="656.41,65.57 659.15,65.57" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="659.15,102.11 805.32,102.11" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="798.68,104.85 798.68,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="765.46,104.85 765.46,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="732.24,104.85 732.24,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="699.02,104.85 699.02,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="665.79,104.85 665.79,102.11" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="798.68" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="765.46" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="732.24" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="699.02" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="665.79" y="113.33" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<g><text x="732.24" y="126.04" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="104.64px" lengthAdjust="spacingAndGlyphs">Column proportions</text></g>
|
||||||
|
</g><g clip-path="url(#c21)">
|
||||||
|
<rect x="64.90" y="141.63" width="219.26" height="109.63" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="64.90" y="141.63" width="219.26" height="2.45" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="64.90" y="144.08" width="219.26" height="107.18" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<g><text x="174.53" y="201.61" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.98</text></g>
|
||||||
|
</g><g clip-path="url(#c22)">
|
||||||
|
<polyline points="64.90,251.26 64.90,141.63" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="59.97" y="254.41" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="59.97" y="227.00" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="59.97" y="199.59" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="59.97" y="172.18" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="59.97" y="144.78" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<polyline points="62.16,251.26 64.90,251.26" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="62.16,223.86 64.90,223.86" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="62.16,196.45 64.90,196.45" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="62.16,169.04 64.90,169.04" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="62.16,141.63 64.90,141.63" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text transform="translate(37.14,196.45) rotate(-90.00)" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="86.46px" lengthAdjust="spacingAndGlyphs">Row proportions</text></g>
|
||||||
|
<polyline points="64.90,251.26 284.16,251.26" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="174.53,254.00 174.53,251.26" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text transform="translate(177.68,256.20) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="137.92px" lengthAdjust="spacingAndGlyphs">Baldock2011_TB+Baldock2011_JN</text></g>
|
||||||
|
<rect x="999.10" y="6.12" width="63.50" height="123.13" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1006.57" y="26.11" style="font-family: Arimo;font-size: 15.00px;" textLength="39.20px" lengthAdjust="spacingAndGlyphs">alpha</text></g>
|
||||||
|
<g><image x="1006.57" y="35.38" width="17.28" height="86.40" preserveAspectRatio="none" xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAEsCAYAAAACUNnVAAAAeUlEQVQ4jcWPSw6AMAhEH2O9/5F1Y2pSARtL4oYA82HggEPsxlWaEM0QmyEkp+uj3WMvFnUTo7vjSRkVOQUEEZDLzBytJ3u/EbrMAHXkYYfzUbxLXyVV1LhMp59UjGhotRrthxhFpim69GVN8OjGR5c4ZKFLYeaEdwL6pgZTBMOKRwAAAABJRU5ErkJggg=="/></g>
|
||||||
|
<polyline points="1020.40,121.64 1023.85,121.64" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1020.40,100.11 1023.85,100.11" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1020.40,78.58 1023.85,78.58" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1020.40,57.05 1023.85,57.05" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1020.40,35.53 1023.85,35.53" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,121.64 1006.57,121.64" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,100.11 1006.57,100.11" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,78.58 1006.57,78.58" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,57.05 1006.57,57.05" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,35.53 1006.57,35.53" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="1031.32" y="125.92" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="1031.32" y="104.39" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="1031.32" y="82.87" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="1031.32" y="61.34" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="1031.32" y="39.81" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<rect x="999.10" y="140.21" width="81.55" height="61.50" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1004.58" y="154.87" style="font-family: Arimo;font-size: 11.00px;" textLength="70.59px" lengthAdjust="spacingAndGlyphs">Column block</text></g>
|
||||||
|
<rect x="1004.58" y="161.67" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="162.38" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="1004.58" y="178.95" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="179.66" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<g><text x="1027.34" y="173.45" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="1027.34" y="190.73" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<rect x="999.10" y="212.67" width="63.37" height="61.50" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1004.58" y="227.32" style="font-family: Arimo;font-size: 11.00px;" textLength="52.41px" lengthAdjust="spacingAndGlyphs">Row block</text></g>
|
||||||
|
<rect x="1004.58" y="234.12" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="234.83" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="1004.58" y="251.40" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="252.11" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<g><text x="1027.34" y="245.91" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="1027.34" y="263.19" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
</g><g clip-path="url(#c31)">
|
||||||
|
<rect x="320.63" y="530.25" width="767.49" height="247.47" style="stroke-width: 2.13;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c33)">
|
||||||
|
<rect x="320.63" y="419.63" width="767.49" height="110.62" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c35)">
|
||||||
|
<rect x="5.48" y="530.25" width="315.15" height="247.47" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c37)">
|
||||||
|
<rect x="628.21" y="537.72" width="182.72" height="109.63" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<polyline points="628.21,537.72 810.93,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="628.21,574.27 810.93,574.27" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="628.21,610.81 810.93,610.81" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="628.21,647.35 810.93,647.35" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="628.21,647.35 628.21,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="664.76,647.35 664.76,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="701.30,647.35 701.30,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="737.84,647.35 737.84,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="774.39,647.35 774.39,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="810.93,647.35 810.93,537.72" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="628.21,555.99 810.93,555.99" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="628.21,592.54 810.93,592.54" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="628.21,629.08 810.93,629.08" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="646.48,647.35 646.48,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="683.03,647.35 683.03,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="719.57,647.35 719.57,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="756.11,647.35 756.11,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="792.66,647.35 792.66,537.72" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<rect x="628.21" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFBCA7;"/>
|
||||||
|
<rect x="664.76" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFB29A;"/>
|
||||||
|
<rect x="701.30" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFBBA5;"/>
|
||||||
|
<rect x="737.84" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF6F2;"/>
|
||||||
|
<rect x="774.39" y="537.72" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFD3C4;"/>
|
||||||
|
<rect x="628.21" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FF9B7E;"/>
|
||||||
|
<rect x="664.76" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFC9B6;"/>
|
||||||
|
<rect x="701.30" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFDFD3;"/>
|
||||||
|
<rect x="737.84" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF8F5;"/>
|
||||||
|
<rect x="774.39" y="574.27" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFAF8;"/>
|
||||||
|
<rect x="628.21" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFDBCF;"/>
|
||||||
|
<rect x="664.76" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF2ED;"/>
|
||||||
|
<rect x="701.30" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFCFB;"/>
|
||||||
|
<rect x="737.84" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFEFD;"/>
|
||||||
|
<rect x="774.39" y="610.81" width="36.54" height="36.54" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFFFE;"/>
|
||||||
|
<line x1="628.21" y1="574.27" x2="810.93" y2="574.27" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="628.21" y1="610.81" x2="810.93" y2="610.81" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="628.21" y1="647.35" x2="810.93" y2="647.35" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="664.76" y1="647.35" x2="664.76" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="701.30" y1="647.35" x2="701.30" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="737.84" y1="647.35" x2="737.84" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="774.39" y1="647.35" x2="774.39" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="810.93" y1="647.35" x2="810.93" y2="537.72" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="646.48" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.35</text></g>
|
||||||
|
<g><text x="683.03" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.4</text></g>
|
||||||
|
<g><text x="719.57" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.35</text></g>
|
||||||
|
<g><text x="756.11" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.05</text></g>
|
||||||
|
<g><text x="792.66" y="559.93" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.23</text></g>
|
||||||
|
<g><text x="646.48" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.51</text></g>
|
||||||
|
<g><text x="683.03" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.28</text></g>
|
||||||
|
<g><text x="719.57" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.17</text></g>
|
||||||
|
<g><text x="756.11" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.04</text></g>
|
||||||
|
<g><text x="792.66" y="596.48" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.03</text></g>
|
||||||
|
<g><text x="646.48" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.19</text></g>
|
||||||
|
<g><text x="683.03" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.07</text></g>
|
||||||
|
<g><text x="719.57" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.02</text></g>
|
||||||
|
<g><text x="756.11" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.01</text></g>
|
||||||
|
<g><text x="792.66" y="633.02" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="6.31px" lengthAdjust="spacingAndGlyphs">0</text></g>
|
||||||
|
<rect x="628.21" y="537.72" width="182.72" height="109.63" style="stroke-width: 2.13;stroke: #333333;"/>
|
||||||
|
</g><g clip-path="url(#c38)">
|
||||||
|
<g><text x="621.49" y="560.28" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="621.49" y="596.82" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<g><text x="621.49" y="633.37" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">3</text></g>
|
||||||
|
<polyline points="624.48,555.99 628.21,555.99" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="624.48,592.54 628.21,592.54" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="624.48,629.08 628.21,629.08" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="646.48,651.09 646.48,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="683.03,651.09 683.03,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="719.57,651.09 719.57,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="756.11,651.09 756.11,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="792.66,651.09 792.66,647.35" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="646.48" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="683.03" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<g><text x="719.57" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">3</text></g>
|
||||||
|
<g><text x="756.11" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">4</text></g>
|
||||||
|
<g><text x="792.66" y="662.65" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">5</text></g>
|
||||||
|
</g><g clip-path="url(#c39)">
|
||||||
|
<rect x="536.85" y="425.11" width="365.44" height="73.09" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="553.46" y="427.72" width="1.74" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="553.46" y="445.12" width="6.59" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="553.46" y="462.53" width="2.27" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="553.46" y="479.93" width="4.25" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="555.20" y="427.72" width="11.96" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="560.05" y="445.12" width="8.82" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="555.74" y="462.53" width="17.26" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="557.72" y="479.93" width="16.85" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="567.16" y="427.72" width="43.23" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="568.87" y="445.12" width="31.53" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="572.99" y="462.53" width="43.96" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="574.56" y="479.93" width="40.32" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="610.40" y="427.72" width="122.46" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="600.40" y="445.12" width="130.24" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="616.95" y="462.53" width="122.77" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="614.88" y="479.93" width="104.95" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="732.85" y="427.72" width="152.83" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<rect x="730.63" y="445.12" width="155.05" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<rect x="739.72" y="462.53" width="145.96" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<rect x="719.83" y="479.93" width="165.84" height="15.66" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<g><text x="588.78" y="439.49" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
|
||||||
|
<g><text x="594.97" y="474.30" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
|
||||||
|
<g><text x="594.72" y="491.70" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.12</text></g>
|
||||||
|
<g><text x="671.62" y="439.49" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.37</text></g>
|
||||||
|
<g><text x="665.51" y="456.89" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.39</text></g>
|
||||||
|
<g><text x="678.34" y="474.30" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.37</text></g>
|
||||||
|
<g><text x="667.36" y="491.70" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.32</text></g>
|
||||||
|
<g><text x="809.26" y="439.49" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.46</text></g>
|
||||||
|
<g><text x="808.15" y="456.89" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.47</text></g>
|
||||||
|
<g><text x="812.70" y="474.30" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.44</text></g>
|
||||||
|
<g><text x="802.76" y="491.70" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.5</text></g>
|
||||||
|
</g><g clip-path="url(#c40)">
|
||||||
|
<polyline points="536.85,498.20 536.85,425.11" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="531.92" y="490.90" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="90.48px" lengthAdjust="spacingAndGlyphs">Baldock2019_Reading</text></g>
|
||||||
|
<g><text x="531.92" y="473.50" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="80.74px" lengthAdjust="spacingAndGlyphs">Baldock2019_Leeds</text></g>
|
||||||
|
<g><text x="531.92" y="456.10" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="99.91px" lengthAdjust="spacingAndGlyphs">Baldock2019_Edinburgh</text></g>
|
||||||
|
<g><text x="531.92" y="438.70" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="83.19px" lengthAdjust="spacingAndGlyphs">Baldock2019_Bristol</text></g>
|
||||||
|
<polyline points="534.11,487.76 536.85,487.76" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="534.11,470.36 536.85,470.36" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="534.11,452.96 536.85,452.96" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="534.11,435.55 536.85,435.55" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="536.85,498.20 902.29,498.20" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="885.68,500.94 885.68,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="802.62,500.94 802.62,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="719.57,500.94 719.57,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="636.52,500.94 636.52,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="553.46,500.94 553.46,498.20" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="885.68" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="802.62" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="719.57" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="636.52" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="553.46" y="509.42" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<g><text x="719.57" y="522.13" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="104.64px" lengthAdjust="spacingAndGlyphs">Column proportions</text></g>
|
||||||
|
</g><g clip-path="url(#c41)">
|
||||||
|
<rect x="46.58" y="547.78" width="268.57" height="89.52" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="46.58" y="547.78" width="61.98" height="2.49" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="115.45" y="547.78" width="61.98" height="4.16" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="184.31" y="547.78" width="61.98" height="4.26" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="253.17" y="547.78" width="61.98" height="1.60" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="46.58" y="550.26" width="61.98" height="8.69" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="115.45" y="551.94" width="61.98" height="10.41" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="184.31" y="552.03" width="61.98" height="15.62" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="253.17" y="549.38" width="61.98" height="12.68" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="46.58" y="558.95" width="61.98" height="78.35" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<rect x="115.45" y="562.35" width="61.98" height="74.95" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<rect x="184.31" y="567.65" width="61.98" height="69.65" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<rect x="253.17" y="562.06" width="61.98" height="75.24" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<g><text x="146.43" y="561.08" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.12</text></g>
|
||||||
|
<g><text x="215.30" y="563.78" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.17</text></g>
|
||||||
|
<g><text x="284.16" y="559.66" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.14</text></g>
|
||||||
|
<g><text x="77.57" y="602.07" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.88</text></g>
|
||||||
|
<g><text x="146.43" y="603.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.84</text></g>
|
||||||
|
<g><text x="215.30" y="606.41" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.78</text></g>
|
||||||
|
<g><text x="284.16" y="603.62" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.84</text></g>
|
||||||
|
</g><g clip-path="url(#c42)">
|
||||||
|
<polyline points="46.58,637.30 46.58,547.78" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="41.65" y="640.44" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="41.65" y="618.06" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="41.65" y="595.68" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="41.65" y="573.30" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="41.65" y="550.92" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<polyline points="43.84,637.30 46.58,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="43.84,614.92 46.58,614.92" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="43.84,592.54 46.58,592.54" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="43.84,570.16 46.58,570.16" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="43.84,547.78 46.58,547.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text transform="translate(18.81,592.54) rotate(-90.00)" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="86.46px" lengthAdjust="spacingAndGlyphs">Row proportions</text></g>
|
||||||
|
<polyline points="46.58,637.30 315.15,637.30" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="77.57,640.04 77.57,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="146.43,640.04 146.43,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="215.30,640.04 215.30,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="284.16,640.04 284.16,637.30" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text transform="translate(80.71,642.23) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="83.19px" lengthAdjust="spacingAndGlyphs">Baldock2019_Bristol</text></g>
|
||||||
|
<g><text transform="translate(149.58,642.23) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="99.91px" lengthAdjust="spacingAndGlyphs">Baldock2019_Edinburgh</text></g>
|
||||||
|
<g><text transform="translate(218.44,642.23) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="80.74px" lengthAdjust="spacingAndGlyphs">Baldock2019_Leeds</text></g>
|
||||||
|
<g><text transform="translate(287.31,642.23) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="90.48px" lengthAdjust="spacingAndGlyphs">Baldock2019_Reading</text></g>
|
||||||
|
<rect x="999.10" y="393.57" width="63.50" height="123.13" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1006.57" y="413.56" style="font-family: Arimo;font-size: 15.00px;" textLength="39.20px" lengthAdjust="spacingAndGlyphs">alpha</text></g>
|
||||||
|
<g><image x="1006.57" y="422.83" width="17.28" height="86.40" preserveAspectRatio="none" xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAEsCAYAAAACUNnVAAAAeUlEQVQ4jcWPSw6AMAhEH2O9/5F1Y2pSARtL4oYA82HggEPsxlWaEM0QmyEkp+uj3WMvFnUTo7vjSRkVOQUEEZDLzBytJ3u/EbrMAHXkYYfzUbxLXyVV1LhMp59UjGhotRrthxhFpim69GVN8OjGR5c4ZKFLYeaEdwL6pgZTBMOKRwAAAABJRU5ErkJggg=="/></g>
|
||||||
|
<polyline points="1020.40,509.09 1023.85,509.09" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1020.40,487.56 1023.85,487.56" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1020.40,466.03 1023.85,466.03" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1020.40,444.50 1023.85,444.50" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1020.40,422.97 1023.85,422.97" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,509.09 1006.57,509.09" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,487.56 1006.57,487.56" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,466.03 1006.57,466.03" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,444.50 1006.57,444.50" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1010.03,422.97 1006.57,422.97" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="1031.32" y="513.37" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="1031.32" y="491.84" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="1031.32" y="470.31" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="1031.32" y="448.79" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="1031.32" y="427.26" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<rect x="999.10" y="527.66" width="81.55" height="78.78" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1004.58" y="542.32" style="font-family: Arimo;font-size: 11.00px;" textLength="70.59px" lengthAdjust="spacingAndGlyphs">Column block</text></g>
|
||||||
|
<rect x="1004.58" y="549.12" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="549.83" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="1037.85" y="549.12" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1038.56" y="549.83" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="1004.58" y="566.40" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="567.11" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="1037.85" y="566.40" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1038.56" y="567.11" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="1004.58" y="583.68" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="584.39" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<g><text x="1027.34" y="560.90" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="1060.61" y="560.90" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<g><text x="1027.34" y="578.18" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">3</text></g>
|
||||||
|
<g><text x="1060.61" y="578.18" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">4</text></g>
|
||||||
|
<g><text x="1027.34" y="595.46" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">5</text></g>
|
||||||
|
<rect x="999.10" y="617.40" width="72.02" height="61.50" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1004.58" y="632.05" style="font-family: Arimo;font-size: 11.00px;" textLength="52.41px" lengthAdjust="spacingAndGlyphs">Row block</text></g>
|
||||||
|
<rect x="1004.58" y="638.85" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="639.56" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="1037.85" y="638.85" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1038.56" y="639.56" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="1004.58" y="656.13" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1005.29" y="656.84" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<g><text x="1027.34" y="650.64" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="1060.61" y="650.64" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<g><text x="1027.34" y="667.92" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">3</text></g>
|
||||||
|
<g><text x="545.80" y="14.90" text-anchor="middle" style="font-family: Arimo;font-size: 13.20px;" textLength="83.92px" lengthAdjust="spacingAndGlyphs">Best partition</text></g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 50 KiB |
272
suivi/2025-18/figs/partition-pirho.svg
Normal file
|
|
@ -0,0 +1,272 @@
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" class="httpgd" width="1483.00" height="992.00" viewBox="0 0 1186.40 793.60">
|
||||||
|
<defs>
|
||||||
|
<style type='text/css'><![CDATA[
|
||||||
|
.httpgd line, .httpgd polyline, .httpgd polygon, .httpgd path, .httpgd rect, .httpgd circle {
|
||||||
|
fill: none;
|
||||||
|
stroke: #000000;
|
||||||
|
stroke-linecap: round;
|
||||||
|
stroke-linejoin: round;
|
||||||
|
stroke-miterlimit: 10.00;
|
||||||
|
}
|
||||||
|
]]></style>
|
||||||
|
<clipPath id="c0"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
|
||||||
|
<clipPath id="c1"><rect x="46.58" y="10.96" width="1034.36" height="608.79"/></clipPath>
|
||||||
|
<clipPath id="c2"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
|
||||||
|
<clipPath id="c3"><rect x="338.90" y="270.72" width="842.02" height="517.40"/></clipPath>
|
||||||
|
<clipPath id="c4"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
|
||||||
|
<clipPath id="c5"><rect x="338.90" y="5.48" width="842.02" height="265.24"/></clipPath>
|
||||||
|
<clipPath id="c6"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
|
||||||
|
<clipPath id="c7"><rect x="5.48" y="270.72" width="333.42" height="517.40"/></clipPath>
|
||||||
|
<clipPath id="c8"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
|
||||||
|
<clipPath id="c9"><rect x="509.47" y="278.19" width="569.27" height="341.56"/></clipPath>
|
||||||
|
<clipPath id="c10"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
|
||||||
|
<clipPath id="c11"><rect x="507.27" y="67.45" width="573.68" height="114.74"/></clipPath>
|
||||||
|
<clipPath id="c12"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
|
||||||
|
<clipPath id="c13"><rect x="46.58" y="401.16" width="286.84" height="95.61"/></clipPath>
|
||||||
|
<clipPath id="c14"><rect x="0.00" y="0.00" width="1186.40" height="793.60"/></clipPath>
|
||||||
|
</defs>
|
||||||
|
<rect width="100%" height="100%" style="stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g clip-path="url(#c0)">
|
||||||
|
<rect x="-0.00" y="0.00" width="1186.40" height="793.60" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c3)">
|
||||||
|
<rect x="338.90" y="270.72" width="842.02" height="517.40" style="stroke-width: 2.13;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c5)">
|
||||||
|
<rect x="338.90" y="5.48" width="842.02" height="265.24" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c7)">
|
||||||
|
<rect x="5.48" y="270.72" width="333.42" height="517.40" style="stroke-width: 1.07;stroke: #FFFFFF;fill: #FFFFFF;"/>
|
||||||
|
</g><g clip-path="url(#c9)">
|
||||||
|
<rect x="509.47" y="278.19" width="569.27" height="341.56" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<polyline points="509.47,278.19 1078.74,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="509.47,392.04 1078.74,392.04" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="509.47,505.90 1078.74,505.90" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="509.47,619.75 1078.74,619.75" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="509.47,619.75 509.47,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="623.32,619.75 623.32,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="737.18,619.75 737.18,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="851.03,619.75 851.03,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="964.89,619.75 964.89,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1078.74,619.75 1078.74,278.19" style="stroke-width: 1.07;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="509.47,335.12 1078.74,335.12" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="509.47,448.97 1078.74,448.97" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="509.47,562.82 1078.74,562.82" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="566.40,619.75 566.40,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="680.25,619.75 680.25,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="794.10,619.75 794.10,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="907.96,619.75 907.96,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1021.81,619.75 1021.81,278.19" style="stroke-width: 2.13;stroke: #EBEBEB;stroke-linecap: butt;"/>
|
||||||
|
<rect x="509.47" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFC1AC;"/>
|
||||||
|
<rect x="623.32" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFA98E;"/>
|
||||||
|
<rect x="737.18" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFBAA4;"/>
|
||||||
|
<rect x="851.03" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFECE5;"/>
|
||||||
|
<rect x="964.89" y="278.19" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFBBA5;"/>
|
||||||
|
<rect x="509.47" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FF9A7D;"/>
|
||||||
|
<rect x="623.32" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFC6B3;"/>
|
||||||
|
<rect x="737.18" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFDDD0;"/>
|
||||||
|
<rect x="851.03" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF8F5;"/>
|
||||||
|
<rect x="964.89" y="392.04" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFAF9;"/>
|
||||||
|
<rect x="509.47" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFDBCF;"/>
|
||||||
|
<rect x="623.32" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFF2ED;"/>
|
||||||
|
<rect x="737.18" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFCFB;"/>
|
||||||
|
<rect x="851.03" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFEFD;"/>
|
||||||
|
<rect x="964.89" y="505.90" width="113.85" height="113.85" style="stroke-width: 0.21;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FFFFFF;"/>
|
||||||
|
<line x1="509.47" y1="392.04" x2="1078.74" y2="392.04" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="509.47" y1="505.90" x2="1078.74" y2="505.90" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="509.47" y1="619.75" x2="1078.74" y2="619.75" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="623.32" y1="619.75" x2="623.32" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="737.18" y1="619.75" x2="737.18" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="851.03" y1="619.75" x2="851.03" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="964.89" y1="619.75" x2="964.89" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<line x1="1078.74" y1="619.75" x2="1078.74" y2="278.19" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="566.40" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.32</text></g>
|
||||||
|
<g><text x="680.25" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.45</text></g>
|
||||||
|
<g><text x="794.10" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.36</text></g>
|
||||||
|
<g><text x="907.96" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.1</text></g>
|
||||||
|
<g><text x="1021.81" y="339.05" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.35</text></g>
|
||||||
|
<g><text x="566.40" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.52</text></g>
|
||||||
|
<g><text x="680.25" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="15.57px" lengthAdjust="spacingAndGlyphs">0.3</text></g>
|
||||||
|
<g><text x="794.10" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.18</text></g>
|
||||||
|
<g><text x="907.96" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.04</text></g>
|
||||||
|
<g><text x="1021.81" y="452.91" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.02</text></g>
|
||||||
|
<g><text x="566.40" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.19</text></g>
|
||||||
|
<g><text x="680.25" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.07</text></g>
|
||||||
|
<g><text x="794.10" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.02</text></g>
|
||||||
|
<g><text x="907.96" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.01</text></g>
|
||||||
|
<g><text x="1021.81" y="566.76" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="6.31px" lengthAdjust="spacingAndGlyphs">0</text></g>
|
||||||
|
<rect x="509.47" y="278.19" width="569.27" height="341.56" style="stroke-width: 2.13;stroke: #333333;"/>
|
||||||
|
</g><g clip-path="url(#c10)">
|
||||||
|
<g><text x="502.74" y="339.40" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="502.74" y="453.25" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<g><text x="502.74" y="567.11" text-anchor="end" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">3</text></g>
|
||||||
|
<polyline points="505.73,335.12 509.47,335.12" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="505.73,448.97 509.47,448.97" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="505.73,562.82 509.47,562.82" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="566.40,623.49 566.40,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="680.25,623.49 680.25,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="794.10,623.49 794.10,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="907.96,623.49 907.96,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1021.81,623.49 1021.81,619.75" style="stroke-width: 2.13;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="566.40" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="680.25" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<g><text x="794.10" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">3</text></g>
|
||||||
|
<g><text x="907.96" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">4</text></g>
|
||||||
|
<g><text x="1021.81" y="635.04" text-anchor="middle" style="font-family: Arimo;font-size: 12.00px;fill: #4D4D4D;" textLength="6.86px" lengthAdjust="spacingAndGlyphs">5</text></g>
|
||||||
|
</g><g clip-path="url(#c11)">
|
||||||
|
<rect x="507.27" y="67.45" width="573.68" height="114.74" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="533.34" y="70.76" width="0.00" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="533.34" y="92.82" width="2.72" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="533.34" y="114.88" width="11.11" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="533.34" y="136.95" width="3.40" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="533.34" y="159.01" width="6.61" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="533.34" y="70.76" width="12.01" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="536.06" y="92.82" width="18.08" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="544.45" y="114.88" width="12.26" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="536.74" y="136.95" width="26.92" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="539.95" y="159.01" width="25.23" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="545.35" y="70.76" width="11.80" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="554.14" y="92.82" width="68.70" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="556.71" y="114.88" width="38.34" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="563.67" y="136.95" width="68.82" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="565.19" y="159.01" width="67.52" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="557.15" y="70.76" width="47.64" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="622.85" y="92.82" width="317.75" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="595.06" y="114.88" width="388.85" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="632.49" y="136.95" width="328.64" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="632.71" y="159.01" width="216.63" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="604.79" y="70.76" width="450.07" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<rect x="940.60" y="92.82" width="114.27" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<rect x="983.90" y="114.88" width="70.96" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<rect x="961.14" y="136.95" width="93.73" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<rect x="849.35" y="159.01" width="205.52" height="19.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<g><text x="588.49" y="106.69" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
|
||||||
|
<g><text x="598.08" y="150.82" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
|
||||||
|
<g><text x="598.95" y="172.88" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.13</text></g>
|
||||||
|
<g><text x="781.72" y="106.69" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.61</text></g>
|
||||||
|
<g><text x="789.48" y="128.75" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="796.81" y="150.82" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.63</text></g>
|
||||||
|
<g><text x="741.03" y="172.88" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.42</text></g>
|
||||||
|
<g><text x="829.83" y="84.62" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.86</text></g>
|
||||||
|
<g><text x="997.73" y="106.69" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.22</text></g>
|
||||||
|
<g><text x="1019.39" y="128.75" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.14</text></g>
|
||||||
|
<g><text x="1008.00" y="150.82" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.18</text></g>
|
||||||
|
<g><text x="952.11" y="172.88" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.39</text></g>
|
||||||
|
</g><g clip-path="url(#c12)">
|
||||||
|
<polyline points="507.27,182.18 507.27,67.45" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="502.34" y="172.08" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="90.48px" lengthAdjust="spacingAndGlyphs">Baldock2019_Reading</text></g>
|
||||||
|
<g><text x="502.34" y="150.02" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="80.74px" lengthAdjust="spacingAndGlyphs">Baldock2019_Leeds</text></g>
|
||||||
|
<g><text x="502.34" y="127.96" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="99.91px" lengthAdjust="spacingAndGlyphs">Baldock2019_Edinburgh</text></g>
|
||||||
|
<g><text x="502.34" y="105.89" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="83.19px" lengthAdjust="spacingAndGlyphs">Baldock2019_Bristol</text></g>
|
||||||
|
<g><text x="502.34" y="83.83" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="137.92px" lengthAdjust="spacingAndGlyphs">Baldock2011_TB+Baldock2011_JN</text></g>
|
||||||
|
<polyline points="504.53,168.94 507.27,168.94" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="504.53,146.88 507.27,146.88" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="504.53,124.81 507.27,124.81" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="504.53,102.75 507.27,102.75" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="504.53,80.68 507.27,80.68" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="507.27,182.18 1080.94,182.18" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1054.87,184.92 1054.87,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="924.49,184.92 924.49,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="794.10,184.92 794.10,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="663.72,184.92 663.72,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="533.34,184.92 533.34,182.18" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="1054.87" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="924.49" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="794.10" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="663.72" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="533.34" y="193.40" text-anchor="middle" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<g><text x="794.10" y="206.11" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="104.64px" lengthAdjust="spacingAndGlyphs">Column proportions</text></g>
|
||||||
|
</g><g clip-path="url(#c13)">
|
||||||
|
<rect x="46.58" y="401.16" width="286.84" height="95.61" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="46.58" y="401.16" width="52.68" height="1.59" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="105.12" y="401.16" width="52.68" height="2.13" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="163.66" y="401.16" width="52.68" height="4.46" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="222.20" y="401.16" width="52.68" height="3.80" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="280.73" y="401.16" width="52.68" height="1.15" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="46.58" y="402.76" width="52.68" height="94.02" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="105.12" y="403.29" width="52.68" height="9.23" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="163.66" y="405.62" width="52.68" height="10.37" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="222.20" y="404.96" width="52.68" height="17.41" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="280.73" y="402.31" width="52.68" height="14.17" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="46.58" y="496.78" width="52.68" height="0.00" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<rect x="105.12" y="412.52" width="52.68" height="84.25" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<rect x="163.66" y="416.00" width="52.68" height="80.78" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<rect x="222.20" y="422.37" width="52.68" height="74.41" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<rect x="280.73" y="416.48" width="52.68" height="80.30" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<g><text x="72.92" y="453.71" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.98</text></g>
|
||||||
|
<g><text x="190.00" y="414.75" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.11</text></g>
|
||||||
|
<g><text x="248.54" y="417.60" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.18</text></g>
|
||||||
|
<g><text x="307.08" y="413.33" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.15</text></g>
|
||||||
|
<g><text x="131.46" y="458.59" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.88</text></g>
|
||||||
|
<g><text x="190.00" y="460.33" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.84</text></g>
|
||||||
|
<g><text x="248.54" y="463.51" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.78</text></g>
|
||||||
|
<g><text x="307.08" y="460.57" text-anchor="middle" style="font-family: Arimo;font-size: 11.04px;" textLength="21.88px" lengthAdjust="spacingAndGlyphs">0.84</text></g>
|
||||||
|
</g><g clip-path="url(#c14)">
|
||||||
|
<polyline points="46.58,496.78 46.58,401.16" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="41.65" y="499.92" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="41.65" y="476.02" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="41.65" y="452.11" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="41.65" y="428.21" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="41.65" y="404.31" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="17.45px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<polyline points="43.84,496.78 46.58,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="43.84,472.87 46.58,472.87" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="43.84,448.97 46.58,448.97" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="43.84,425.07 46.58,425.07" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="43.84,401.16 46.58,401.16" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text transform="translate(18.81,448.97) rotate(-90.00)" text-anchor="middle" style="font-family: Arimo;font-size: 11.00px;" textLength="86.46px" lengthAdjust="spacingAndGlyphs">Row proportions</text></g>
|
||||||
|
<polyline points="46.58,496.78 333.42,496.78" style="stroke-width: 1.07;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="72.92,499.52 72.92,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="131.46,499.52 131.46,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="190.00,499.52 190.00,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="248.54,499.52 248.54,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="307.08,499.52 307.08,496.78" style="stroke-width: 1.07;stroke: #333333;stroke-linecap: butt;"/>
|
||||||
|
<g><text transform="translate(76.07,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="137.92px" lengthAdjust="spacingAndGlyphs">Baldock2011_TB+Baldock2011_JN</text></g>
|
||||||
|
<g><text transform="translate(134.60,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="83.19px" lengthAdjust="spacingAndGlyphs">Baldock2019_Bristol</text></g>
|
||||||
|
<g><text transform="translate(193.14,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="99.91px" lengthAdjust="spacingAndGlyphs">Baldock2019_Edinburgh</text></g>
|
||||||
|
<g><text transform="translate(251.68,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="80.74px" lengthAdjust="spacingAndGlyphs">Baldock2019_Leeds</text></g>
|
||||||
|
<g><text transform="translate(310.22,501.71) rotate(-90.00)" text-anchor="end" style="font-family: Arimo;font-size: 8.80px;fill: #4D4D4D;" textLength="90.48px" lengthAdjust="spacingAndGlyphs">Baldock2019_Reading</text></g>
|
||||||
|
<rect x="1091.90" y="172.70" width="63.50" height="123.13" style="stroke-width: 2.13;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1099.37" y="192.68" style="font-family: Arimo;font-size: 15.00px;" textLength="39.20px" lengthAdjust="spacingAndGlyphs">alpha</text></g>
|
||||||
|
<g><image x="1099.37" y="201.95" width="17.28" height="86.40" preserveAspectRatio="none" xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAEsCAYAAAACUNnVAAAAeUlEQVQ4jcWPSw6AMAhEH2O9/5F1Y2pSARtL4oYA82HggEPsxlWaEM0QmyEkp+uj3WMvFnUTo7vjSRkVOQUEEZDLzBytJ3u/EbrMAHXkYYfzUbxLXyVV1LhMp59UjGhotRrthxhFpim69GVN8OjGR5c4ZKFLYeaEdwL6pgZTBMOKRwAAAABJRU5ErkJggg=="/></g>
|
||||||
|
<polyline points="1113.20,288.21 1116.65,288.21" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1113.20,266.68 1116.65,266.68" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1113.20,245.15 1116.65,245.15" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1113.20,223.63 1116.65,223.63" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1113.20,202.10 1116.65,202.10" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1102.83,288.21 1099.37,288.21" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1102.83,266.68 1099.37,266.68" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1102.83,245.15 1099.37,245.15" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1102.83,223.63 1099.37,223.63" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<polyline points="1102.83,202.10 1099.37,202.10" style="stroke-width: 0.38;stroke: #FFFFFF;stroke-linecap: butt;"/>
|
||||||
|
<g><text x="1124.12" y="292.49" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.00</text></g>
|
||||||
|
<g><text x="1124.12" y="270.97" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.25</text></g>
|
||||||
|
<g><text x="1124.12" y="249.44" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.50</text></g>
|
||||||
|
<g><text x="1124.12" y="227.91" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">0.75</text></g>
|
||||||
|
<g><text x="1124.12" y="206.38" style="font-family: Arimo;font-size: 12.00px;" textLength="23.80px" lengthAdjust="spacingAndGlyphs">1.00</text></g>
|
||||||
|
<rect x="1091.90" y="306.78" width="81.55" height="78.78" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1097.38" y="321.44" style="font-family: Arimo;font-size: 11.00px;" textLength="70.59px" lengthAdjust="spacingAndGlyphs">Column block</text></g>
|
||||||
|
<rect x="1097.38" y="328.24" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1098.09" y="328.95" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6D854;"/>
|
||||||
|
<rect x="1130.65" y="328.24" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1131.36" y="328.95" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #E78AC3;"/>
|
||||||
|
<rect x="1097.38" y="345.52" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1098.09" y="346.23" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #8DA0CB;"/>
|
||||||
|
<rect x="1130.65" y="345.52" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1131.36" y="346.23" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #FC8D62;"/>
|
||||||
|
<rect x="1097.38" y="362.80" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1098.09" y="363.51" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #66C2A5;"/>
|
||||||
|
<g><text x="1120.14" y="340.02" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="1153.41" y="340.02" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<g><text x="1120.14" y="357.30" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">3</text></g>
|
||||||
|
<g><text x="1153.41" y="357.30" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">4</text></g>
|
||||||
|
<g><text x="1120.14" y="374.58" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">5</text></g>
|
||||||
|
<rect x="1091.90" y="396.52" width="72.02" height="61.50" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<g><text x="1097.38" y="411.17" style="font-family: Arimo;font-size: 11.00px;" textLength="52.41px" lengthAdjust="spacingAndGlyphs">Row block</text></g>
|
||||||
|
<rect x="1097.38" y="417.98" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1098.09" y="418.68" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #B2DF8A;"/>
|
||||||
|
<rect x="1130.65" y="417.98" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1131.36" y="418.68" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #1F78B4;"/>
|
||||||
|
<rect x="1097.38" y="435.26" width="17.28" height="17.28" style="stroke-width: 1.07;stroke: none;fill: #FFFFFF;"/>
|
||||||
|
<rect x="1098.09" y="435.96" width="15.86" height="15.86" style="stroke-width: 1.07;stroke: none;stroke-linecap: butt;stroke-linejoin: miter;fill: #A6CEE3;"/>
|
||||||
|
<g><text x="1120.14" y="429.76" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">1</text></g>
|
||||||
|
<g><text x="1153.41" y="429.76" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">2</text></g>
|
||||||
|
<g><text x="1120.14" y="447.04" style="font-family: Arimo;font-size: 8.80px;" textLength="5.03px" lengthAdjust="spacingAndGlyphs">3</text></g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 35 KiB |
156
suivi/2025-19/2025-19.qmd
Normal file
|
|
@ -0,0 +1,156 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 19 2025 : 5 mai - 9 mai"
|
||||||
|
categories: [colBiSBM, inférence]
|
||||||
|
date: 2025 05 09
|
||||||
|
---
|
||||||
|
|
||||||
|
## TOP PRIORITÉ
|
||||||
|
|
||||||
|
- Débugguer les simulations :
|
||||||
|
|
||||||
|
- Clustering : Relancer simulations de clustering avec $M = 30$ où $M_i = 10, \forall i$. En attente retour MIGALE
|
||||||
|
Relancer simus clustering avec VEM steps = 10 000 et plus nombreux init pour spectral. Ajouter simu clustering métriques nb sous-collections obtenues.
|
||||||
|
Vérifier les résultats obtenus si ARI = 0. Et augmenter la taille $M = 30$ avec $M_1 = M_2 = M_3 = 10$. ~~-> BUG, dois creuser mais juste des problèmes techniques.~~
|
||||||
|
Le bug venait probablement d'une inadéquation entre la version de *future* et *future.callr*, les résultats temporaires sont encourageant.
|
||||||
|
|
||||||
|
- Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
|
||||||
|
Car densités déséquilibrées.
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Présentations LSD, JdS et ML@Aussois
|
||||||
|
|
||||||
|
- ~~PRÉSENTATION JDS (LSD), durée introuvable, adapter en anglais les slides~~ et voir avec PB et SD.
|
||||||
|
- Quel plan ?
|
||||||
|
- Quels résultats ? Baldock, Traveset ... (sub-Doré)
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Se renseigner techniques d'inférence de réseaux :
|
||||||
|
- covariance (base corrélation et seuil)
|
||||||
|
- GraphicalLASSO
|
||||||
|
- Co-occurence
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- Voir pour TT période du 11 au 14 août
|
||||||
|
- Voir pour date CSI car congés avec parents prévu du 29/08 au 12/09.
|
||||||
|
|
||||||
|
|
||||||
|
## A faire
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- Papier pour comprendre données
|
||||||
|
- ~~Faust et al.~~
|
||||||
|
- Abdill et al.
|
||||||
|
- Bashan et al.
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
|
|
||||||
|
### Rédaction article
|
||||||
|
|
||||||
|
- Relire intro St Clair
|
||||||
|
- S'inspirer structure pour mon intro
|
||||||
|
- Trouver biblio intro
|
||||||
|
- Rédiger l'intro
|
||||||
|
- Dire résultats nettement meilleurs et variabilités inférieures.
|
||||||
|
|
||||||
|
|
||||||
|
## J'ai fait
|
||||||
|
|
||||||
|
### CSI (en attente contacts PB et SD)
|
||||||
|
|
||||||
|
- Est-ce à moi de contacter Saint-Clair et Sonia/Elisa ? *Pierre et Sophie gèrent*
|
||||||
|
- Pierre Gérard a dit oui, il attend les détails
|
||||||
|
- Quand : *fin juin début juillet*
|
||||||
|
- Liste potentielle :
|
||||||
|
- (Saint-Clair)
|
||||||
|
- Mahendra
|
||||||
|
- Elisa/Sonia
|
||||||
|
- Pierre Gérard
|
||||||
|
|
||||||
|
### Finist'R
|
||||||
|
|
||||||
|
- S'inscrire
|
||||||
|
|
||||||
|
### ML at Aussois
|
||||||
|
|
||||||
|
- S'inscrire avec abstract court
|
||||||
|
- Demander la bourse
|
||||||
|
- Détails d'inscriptions : *Je demande une bourse et je m'inscris avec la demande de bourse, Pierre et Sophie font la lettre de recommendation*
|
||||||
|
|
||||||
|
### Présentation
|
||||||
|
|
||||||
|
- J'ai traduis en anglais ma présentation : [Lien](https://forgemia.inra.fr/louis.lacoste/presentation-colbisbm/-/raw/main/presentation.pdf?ref_type=heads)
|
||||||
|
|
||||||
|
## A continuer
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
|
||||||
|
|
||||||
|
> Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
|
||||||
|
(à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
|
||||||
|
|
||||||
|
### Axe inférence
|
||||||
|
|
||||||
|
- Lire biblio fournie Julie, Inférence de réseaux : co-occurence
|
||||||
|
|
||||||
|
> J'ai lu Faust et al.
|
||||||
|
> Je lis Abdill et al.
|
||||||
|
|
||||||
|
## Repoussés ou abandonnés
|
||||||
|
|
||||||
|
- Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever
|
||||||
|
$\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code).
|
||||||
|
Implémenté les missing steps.
|
||||||
|
|
||||||
|
> Je n'arrive pas à comprendre les erreurs qui arrivent
|
||||||
|
|
||||||
|
- Lire Biological Networks - François Képès
|
||||||
|
|
||||||
|
- Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
|
||||||
|
|
||||||
|
:::{#lst-reco-systems lst-cap="Recommender systems data"}
|
||||||
|
Par exemple :
|
||||||
|
|
||||||
|
- [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Papier plus multi-applications
|
||||||
|
- Données d'Elisa herbivore ?
|
||||||
|
- Données urbanisations ?
|
||||||
|
|
||||||
|
### Autour de l'article et du package
|
||||||
|
|
||||||
|
- Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
|
||||||
|
|
||||||
|
### Simulations article
|
||||||
|
|
||||||
|
- Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
|
||||||
|
|
||||||
|
- Corriger structure de simus :
|
||||||
|
- Pour noisy $\alpha$ :
|
||||||
|
- Logit pour envoyer la gaussienne vers (0,1)
|
||||||
|
- Beta contrainte dans (0,1)
|
||||||
|
- Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
178
suivi/2025-20/2025-20.qmd
Normal file
|
|
@ -0,0 +1,178 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 20 2025 : 12 mai - 16 mai"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 05 16
|
||||||
|
---
|
||||||
|
|
||||||
|
## TOP PRIORITÉ
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données réelles :
|
||||||
|
- ~~Relâcher la pénalité pour les coupes pour proposer modèles.~~
|
||||||
|
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- Faire le `hclust` avec diverses distances et voir si les coupes proposées diffèrent sensiblement
|
||||||
|
- Si plusieurs clustering possibles les tester et sélectionner le
|
||||||
|
meilleur
|
||||||
|
- Ré-ajuster les bonnes partitions.
|
||||||
|
- Données simulées tester diverses distances.
|
||||||
|
|
||||||
|
|
||||||
|
- Dé-bugger les simulations :
|
||||||
|
|
||||||
|
- Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
|
||||||
|
|
||||||
|
- Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
|
||||||
|
|
||||||
|
- Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE.
|
||||||
|
|
||||||
|
### Présentations LSD, JdS et ML@Aussois
|
||||||
|
|
||||||
|
- ~~PRÉSENTATION JDS (LSD), durée introuvable, adapter en anglais les slides~~ et voir avec PB et SD.
|
||||||
|
- Quel plan ?
|
||||||
|
- Quels résultats ? Baldock, Traveset ... (sub-Doré)
|
||||||
|
- Pas la peine de préciser l'algo de clustering
|
||||||
|
- Indiquer sur une slide le problème de support pour $\pi\rho$ à faire s'il y a
|
||||||
|
le temps.
|
||||||
|
- Résultats sur les réseaux Baldock, regarder le positionnement par bloc des
|
||||||
|
espèces communes, regarder les probas d'appartenance aux blocs par espèces
|
||||||
|
communes et par réseau.
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
|
||||||
|
Car densités déséquilibrées.
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$
|
||||||
|
- Se renseigner techniques d'inférence de réseaux :
|
||||||
|
- covariance (base corrélation et seuil)
|
||||||
|
- GraphicalLASSO
|
||||||
|
- Co-occurence
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
## A faire
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- Papier pour comprendre données
|
||||||
|
- ~~Faust et al.~~
|
||||||
|
- Abdill et al.
|
||||||
|
- Bashan et al.
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
|
|
||||||
|
### Rédaction article
|
||||||
|
|
||||||
|
- Relire intro St Clair
|
||||||
|
- S'inspirer structure pour mon intro
|
||||||
|
- Trouver biblio intro
|
||||||
|
- Rédiger l'intro
|
||||||
|
- Dire résultats nettement meilleurs et variabilités inférieures.
|
||||||
|
|
||||||
|
|
||||||
|
## J'ai fait
|
||||||
|
|
||||||
|
- Dé-bugger les simulations :
|
||||||
|
|
||||||
|
- Clustering : Relancer simulations de clustering avec $M = 30$ où $M_i = 10, \forall i$. En attente retour MIGALE
|
||||||
|
Relancer simus clustering avec VEM steps = 10 000 et plus nombreux init pour spectral. Ajouter simu clustering métriques nb sous-collections obtenues.
|
||||||
|
Vérifier les résultats obtenus si ARI = 0. Et augmenter la taille $M = 30$ avec $M_1 = M_2 = M_3 = 10$. ~~-> BUG, dois creuser mais juste des problèmes techniques.~~
|
||||||
|
Le bug venait probablement d'une inadéquation entre la version de *future* et *future.callr*, les résultats temporaires sont encourageants.
|
||||||
|
**J'ai mis les résultats dans l'article**.
|
||||||
|
|
||||||
|
### Présentations LSD, JdS et ML@Aussois
|
||||||
|
|
||||||
|
- ~~PRÉSENTATION JDS (LSD), durée introuvable, adapter en anglais les slides~~ et voir avec PB et SD.
|
||||||
|
- Quel plan ?
|
||||||
|
- Quels résultats ? Baldock, Traveset ... (sub-Doré)
|
||||||
|
|
||||||
|
- Mettre le détails des formules et des algos pour VE et sélection de modèle en
|
||||||
|
annexe.
|
||||||
|
- Préciser simplement que l'on utilise un algo VE et un critère type BIC.
|
||||||
|
|
||||||
|
### VGAE
|
||||||
|
|
||||||
|
- ~~Dé-bugger pourquoi `BipartiteInnerProductDecoder.forward() -> NaN`~~ ->
|
||||||
|
**C'était parce que les features en entrée n'était pas normalisée par les
|
||||||
|
couches de convolutions**. Les meilleurs résultats d'AUC et de précisions que
|
||||||
|
j'obtiens par VGAE sont autour de 0.80.
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Human Gut Compendium télécharger et préparé les données. Mises au format
|
||||||
|
`edgelist` et liste de matrices et extrait les infos supplémentaires.
|
||||||
|
→ trop lourd en RAM pour tourner sur machine perso (optim colSBM...)
|
||||||
|
## A continuer
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
|
||||||
|
|
||||||
|
> Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
|
||||||
|
(à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
|
||||||
|
|
||||||
|
### Axe inférence
|
||||||
|
|
||||||
|
- Lire biblio fournie Julie, Inférence de réseaux : co-occurence
|
||||||
|
|
||||||
|
> J'ai lu Faust et al.
|
||||||
|
> Je lis Abdill et al.
|
||||||
|
|
||||||
|
## Repoussés ou abandonnés
|
||||||
|
:::{.callout-note collapse="true"}
|
||||||
|
## Déplier pour voir
|
||||||
|
- Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever
|
||||||
|
$\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code).
|
||||||
|
Implémenté les missing steps.
|
||||||
|
|
||||||
|
> Je n'arrive pas à comprendre les erreurs qui arrivent
|
||||||
|
|
||||||
|
- Lire Biological Networks - François Képès
|
||||||
|
|
||||||
|
- Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
|
||||||
|
|
||||||
|
:::{#lst-reco-systems lst-cap="Recommender systems data"}
|
||||||
|
Par exemple :
|
||||||
|
|
||||||
|
- [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Papier plus multi-applications
|
||||||
|
- Données d'Elisa herbivore ?
|
||||||
|
- Données urbanisations ?
|
||||||
|
|
||||||
|
### Autour de l'article et du package
|
||||||
|
|
||||||
|
- Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
|
||||||
|
|
||||||
|
### Simulations article
|
||||||
|
|
||||||
|
- Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
|
||||||
|
|
||||||
|
- Corriger structure de simus :
|
||||||
|
- Pour noisy $\alpha$ :
|
||||||
|
- Logit pour envoyer la gaussienne vers (0,1)
|
||||||
|
- Beta contrainte dans (0,1)
|
||||||
|
- Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
:::
|
||||||
154
suivi/2025-21/2025-21.qmd
Normal file
|
|
@ -0,0 +1,154 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 21 2025 : 26 mai - 30 mai"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 05 23
|
||||||
|
---
|
||||||
|
|
||||||
|
## TOP PRIORITÉ
|
||||||
|
|
||||||
|
- ✅ Corriger pour les simus dans l'article : écrire $N = \#\text{ de répétitions}$
|
||||||
|
- Pour clustering de collections sur données réelles :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- Faire le `hclust` avec diverses distances et voir si les coupes proposées diffèrent sensiblement
|
||||||
|
- Si plusieurs clustering possibles les tester et sélectionner le
|
||||||
|
meilleur
|
||||||
|
- Ré-ajuster les bonnes partitions.
|
||||||
|
- Idée de Sophie : alterner descendant et ascendant → prometteur aussi
|
||||||
|
- Pour les deux propositions données simulées tester diverses distances.
|
||||||
|
|
||||||
|
|
||||||
|
- Dé-bugger les simulations :
|
||||||
|
|
||||||
|
- Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
|
||||||
|
|
||||||
|
- Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
|
||||||
|
|
||||||
|
- Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE.
|
||||||
|
|
||||||
|
### Présentations LSD, JdS et ML@Aussois
|
||||||
|
|
||||||
|
- ✅ A l'oral pourquoi des réseaux : car de plus en plus disponibles et idée derrière, la structure fonctionnelle permet de comprendre les caractéristiques de l'écosystème décrit
|
||||||
|
- ✅ Chercher des réfs pour les méthodes (Hoff Latent Position Model, Nowicki pour LBM, une review pour les métriques voir thèses St Clair et Emré)
|
||||||
|
- ✅ Sur slide méthodes différencier métriques micro et macro et dire oralement que nous méso
|
||||||
|
- ❎ Indiquer sur une slide le problème de support pour $\pi\rho$ à faire s'il y a
|
||||||
|
le temps.
|
||||||
|
- ✅ Résultats sur les réseaux Baldock, regarder le positionnement par bloc des
|
||||||
|
espèces communes, regarder les probas d'appartenance aux blocs par espèces
|
||||||
|
communes et par réseau ➡️ Bourdons
|
||||||
|
- ✅ Intégrer les retours de Sophie
|
||||||
|
- Attente retours Pierre
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
|
||||||
|
Car densités déséquilibrées.
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Comparer les perfs du VAE sur Baldock avec colBiSBM par exemple
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Se renseigner techniques d'inférence de réseaux :
|
||||||
|
- covariance (base corrélation et seuil)
|
||||||
|
- GraphicalLASSO
|
||||||
|
- Co-occurence
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Demander à JA si elle connaît des réseaux d'interactions connus par les experts (idée d'intégrer une connaissance experte et de voir les différences de structure par rapport à celle attendue)
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
## A faire
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- Papier pour comprendre données
|
||||||
|
- ~~Faust et al.~~
|
||||||
|
- Abdill et al.
|
||||||
|
- Bashan et al.
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
|
|
||||||
|
### Rédaction article
|
||||||
|
|
||||||
|
- Relire intro St Clair
|
||||||
|
- S'inspirer structure pour mon intro
|
||||||
|
- Trouver biblio intro
|
||||||
|
- Rédiger l'intro
|
||||||
|
- Dire résultats nettement meilleurs et variabilités inférieures.
|
||||||
|
|
||||||
|
## A continuer
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
|
||||||
|
|
||||||
|
> Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
|
||||||
|
(à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
|
||||||
|
|
||||||
|
### Axe inférence
|
||||||
|
|
||||||
|
- Lire biblio fournie Julie, Inférence de réseaux : co-occurence
|
||||||
|
|
||||||
|
> J'ai lu Faust et al.
|
||||||
|
> Je lis Abdill et al.
|
||||||
|
|
||||||
|
## Repoussés ou abandonnés
|
||||||
|
:::{.callout-note collapse="true"}
|
||||||
|
## Déplier pour voir
|
||||||
|
- Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever
|
||||||
|
$\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code).
|
||||||
|
Implémenté les missing steps.
|
||||||
|
|
||||||
|
> Je n'arrive pas à comprendre les erreurs qui arrivent
|
||||||
|
|
||||||
|
- Lire Biological Networks - François Képès
|
||||||
|
|
||||||
|
- Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
|
||||||
|
|
||||||
|
:::{#lst-reco-systems lst-cap="Recommender systems data"}
|
||||||
|
Par exemple :
|
||||||
|
|
||||||
|
- [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Papier plus multi-applications
|
||||||
|
- Données d'Elisa herbivore ?
|
||||||
|
- Données urbanisations ?
|
||||||
|
|
||||||
|
### Autour de l'article et du package
|
||||||
|
|
||||||
|
- Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
|
||||||
|
|
||||||
|
### Simulations article
|
||||||
|
|
||||||
|
- Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
|
||||||
|
|
||||||
|
- Corriger structure de simus :
|
||||||
|
- Pour noisy $\alpha$ :
|
||||||
|
- Logit pour envoyer la gaussienne vers (0,1)
|
||||||
|
- Beta contrainte dans (0,1)
|
||||||
|
- Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
:::
|
||||||
146
suivi/2025-22/2025-22.qmd
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 22 2025 : 26 mai - 30 mai"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 05 28
|
||||||
|
---
|
||||||
|
|
||||||
|
## TOP PRIORITÉ
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données réelles :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- Faire le `hclust` avec diverses distances et voir si les coupes proposées diffèrent sensiblement
|
||||||
|
- Si plusieurs clustering possibles les tester et sélectionner le
|
||||||
|
meilleur
|
||||||
|
- Ré-ajuster les bonnes partitions.
|
||||||
|
- Idée de Sophie : alterner descendant et ascendant → prometteur aussi
|
||||||
|
- Pour les deux propositions données simulées tester diverses distances.
|
||||||
|
|
||||||
|
|
||||||
|
- Dé-bugger les simulations :
|
||||||
|
|
||||||
|
- Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
|
||||||
|
|
||||||
|
- Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
|
||||||
|
|
||||||
|
- Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE.
|
||||||
|
|
||||||
|
### Présentations LSD, JdS et ML@Aussois
|
||||||
|
|
||||||
|
- Attente retours Pierre
|
||||||
|
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
|
||||||
|
Car densités déséquilibrées.
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Comparer les perfs du VAE sur Baldock avec colBiSBM par exemple
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Se renseigner techniques d'inférence de réseaux :
|
||||||
|
- covariance (base corrélation et seuil)
|
||||||
|
- GraphicalLASSO
|
||||||
|
- Co-occurence
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Demander à JA si elle connaît des réseaux d'interactions connus par les experts (idée d'intégrer une connaissance experte et de voir les différences de structure par rapport à celle attendue)
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
## A faire
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- Papier pour comprendre données
|
||||||
|
- ~~Faust et al.~~
|
||||||
|
- Abdill et al.
|
||||||
|
- Bashan et al.
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
|
|
||||||
|
### Rédaction article
|
||||||
|
|
||||||
|
- Relire intro St Clair
|
||||||
|
- S'inspirer structure pour mon intro
|
||||||
|
- Trouver biblio intro
|
||||||
|
- Rédiger l'intro
|
||||||
|
- Dire résultats nettement meilleurs et variabilités inférieures.
|
||||||
|
|
||||||
|
|
||||||
|
## A continuer
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
|
||||||
|
|
||||||
|
> Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
|
||||||
|
(à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
|
||||||
|
|
||||||
|
### Axe inférence
|
||||||
|
|
||||||
|
- Lire biblio fournie Julie, Inférence de réseaux : co-occurence
|
||||||
|
|
||||||
|
> J'ai lu Faust et al.
|
||||||
|
> Je lis Abdill et al.
|
||||||
|
|
||||||
|
## Repoussés ou abandonnés
|
||||||
|
:::{.callout-note collapse="true"}
|
||||||
|
## Déplier pour voir
|
||||||
|
- Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever
|
||||||
|
$\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code).
|
||||||
|
Implémenté les missing steps.
|
||||||
|
|
||||||
|
> Je n'arrive pas à comprendre les erreurs qui arrivent
|
||||||
|
|
||||||
|
- Lire Biological Networks - François Képès
|
||||||
|
|
||||||
|
- Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
|
||||||
|
|
||||||
|
:::{#lst-reco-systems lst-cap="Recommender systems data"}
|
||||||
|
Par exemple :
|
||||||
|
|
||||||
|
- [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Papier plus multi-applications
|
||||||
|
- Données d'Elisa herbivore ?
|
||||||
|
- Données urbanisations ?
|
||||||
|
|
||||||
|
### Autour de l'article et du package
|
||||||
|
|
||||||
|
- Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
|
||||||
|
|
||||||
|
### Simulations article
|
||||||
|
|
||||||
|
- Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
|
||||||
|
|
||||||
|
- Corriger structure de simus :
|
||||||
|
- Pour noisy $\alpha$ :
|
||||||
|
- Logit pour envoyer la gaussienne vers (0,1)
|
||||||
|
- Beta contrainte dans (0,1)
|
||||||
|
- Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
:::
|
||||||
164
suivi/2025-24/2025-24.qmd
Normal file
|
|
@ -0,0 +1,164 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 24 2025 : 10 juin - 13 juin"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 06 13
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
- ✅ Préparer la séance intro à Git pour le 13 juin. **La séance s'est très bien passée**
|
||||||
|
- Pour clustering de collections sur données réelles :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- Faire le `hclust` avec diverses distances et voir si les coupes proposées diffèrent sensiblement
|
||||||
|
- Si plusieurs clustering possibles les tester et sélectionner le
|
||||||
|
meilleur
|
||||||
|
- Ré-ajuster les bonnes partitions.
|
||||||
|
- ✅ C'est bon j'ai une fonction qui tourne, mais lentement ⌛
|
||||||
|
- ⏳Simulations en train de tourner
|
||||||
|
- ❗L'approche que j'ai en mettant la pénalité à 0 peut favoriser de séparer trop les réseaux et donc il faudrait refusionner.
|
||||||
|
➡️ mais le d&a ne fonctionne qu'en *iid*
|
||||||
|
- ✅ Idée de Sophie : alterner descendant et ascendant → prometteur aussi. J'ai codé le fichier de simulations et débugguer le vecteur de clustering ▶️ à voir les performances. ➡️ la simu à 9 réseaux (bcp de variabilité a priori) est lancée attente résultats ➡️ Je tombe sur un bug déjà rencontré dans les simus d'inférence. j'ai lancé sans parallélisation pour essayer de comprendre le bug.
|
||||||
|
|
||||||
|
|
||||||
|
✅ Il y avait un bug dans la fenêtre glissant où la condition d'arrêt quand le BICL n'augmentait plus était mal détectée. Corrigé
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
:::{layout-ncol="2"}
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
:::
|
||||||
|
|
||||||
|
- Pour les deux propositions données simulées tester diverses distances.
|
||||||
|
|
||||||
|
|
||||||
|
- Dé-bugger les simulations :
|
||||||
|
|
||||||
|
- Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
|
||||||
|
|
||||||
|
- Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
|
||||||
|
|
||||||
|
- Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE.
|
||||||
|
|
||||||
|
- ✅ Réparé mauvais placement des légendes, des valeurs etc.
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
|
||||||
|
Car densités déséquilibrées.
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Comparer les perfs du VAE sur Baldock avec colBiSBM par exemple
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Se renseigner techniques d'inférence de réseaux :
|
||||||
|
- covariance (base corrélation et seuil)
|
||||||
|
- GraphicalLASSO
|
||||||
|
- Co-occurence
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Demander à JA si elle connaît des réseaux d'interactions connus par les experts (idée d'intégrer une connaissance experte et de voir les différences de structure par rapport à celle attendue)
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
## Lecture en cours
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes
|
||||||
|
- @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- Papier pour comprendre données
|
||||||
|
- ~~Faust et al.~~
|
||||||
|
- Abdill et al.
|
||||||
|
- Bashan et al.
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
|
|
||||||
|
### Rédaction article
|
||||||
|
|
||||||
|
- Relire intro St Clair
|
||||||
|
- S'inspirer structure pour mon intro
|
||||||
|
- Trouver biblio intro
|
||||||
|
- Rédiger l'intro
|
||||||
|
- Dire résultats nettement meilleurs et variabilités inférieures.
|
||||||
|
|
||||||
|
|
||||||
|
## A continuer
|
||||||
|
|
||||||
|
### Applications
|
||||||
|
|
||||||
|
- Idée Sophie: Regarder clustering de données plantes-pollinisateur selon gradient d'urbanisation
|
||||||
|
|
||||||
|
> Sophie a fait une appli qui marche bien et va dans le sens de l'analyse faite
|
||||||
|
(à savoir pas d'effet du gradien d'urbanisation). À continuer pour l'intégrer dans l'article !
|
||||||
|
|
||||||
|
### Axe inférence
|
||||||
|
|
||||||
|
- Lire biblio fournie Julie, Inférence de réseaux : co-occurence
|
||||||
|
|
||||||
|
> J'ai lu Faust et al.
|
||||||
|
> Je lis Abdill et al.
|
||||||
|
|
||||||
|
## Repoussés ou abandonnés
|
||||||
|
:::{.callout-note collapse="true"}
|
||||||
|
## Déplier pour voir
|
||||||
|
- Résultats simus NA **Erreur pour certaines conditions** : Pour NA robustness générer `nb_rep` collections de taille $M=2$ et prélever
|
||||||
|
$\epsilon_{max}n_r n_c$ liens à retirer puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
Il faut que j'ajoute un mécanisme pour reprendre des conditions qui ont plantés et que je skip dans le future_lapply les conditions déjà traitées (pour avoir la même seed quand je vais exécuter le code).
|
||||||
|
Implémenté les missing steps.
|
||||||
|
|
||||||
|
> Je n'arrive pas à comprendre les erreurs qui arrivent
|
||||||
|
|
||||||
|
- Lire Biological Networks - François Képès
|
||||||
|
|
||||||
|
- Regarder les applications pour les collections de réseaux recommender system *Pas pertinents et trop gros*
|
||||||
|
|
||||||
|
:::{#lst-reco-systems lst-cap="Recommender systems data"}
|
||||||
|
Par exemple :
|
||||||
|
|
||||||
|
- [Liste de recommendation data](https://cseweb.ucsd.edu/~jmcauley/datasets.html)
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Papier plus multi-applications
|
||||||
|
- Données d'Elisa herbivore ?
|
||||||
|
- Données urbanisations ?
|
||||||
|
|
||||||
|
### Autour de l'article et du package
|
||||||
|
|
||||||
|
- Créer des vignettes illustrant par exemple des cas de simulations. **Possible de mettre l'exemple d'application de Sophie sur les réseaux avec gradient d'urbanisation**.
|
||||||
|
|
||||||
|
### Simulations article
|
||||||
|
|
||||||
|
- Comparer sur clustering unipartite avec versions symétriser des par blocs des matrices d'adjacences.
|
||||||
|
|
||||||
|
- Corriger structure de simus :
|
||||||
|
- Pour noisy $\alpha$ :
|
||||||
|
- Logit pour envoyer la gaussienne vers (0,1)
|
||||||
|
- Beta contrainte dans (0,1)
|
||||||
|
- Pour noisy links : Générer `nb_clustering` collections de taille M puis prélever $\epsilon_{max}n_r n_c$ liens à inverser puis pour les $\epsilon < \epsilon_{max}$ prélever dans la liste des indices afin d'avoir des perturbations emboitées.
|
||||||
|
:::
|
||||||
BIN
suivi/2025-24/figs/ari-clustering-desc&asc30.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
suivi/2025-24/figs/ari-clustering-desc&asc9.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
suivi/2025-24/figs/nbcollections-clustering-descending.png
Normal file
|
After Width: | Height: | Size: 27 KiB |
20
suivi/2025-24/references.bib
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
124
suivi/2025-25/2025-25.qmd
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 25 2025 : 16 juin - 20 juin"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 06 20
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données réelles :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- ✅ Si plusieurs clustering possibles les tester et sélectionner le
|
||||||
|
meilleur
|
||||||
|
- ✅ Ré-ajuster les bonnes partitions.
|
||||||
|
- ❓Je n'arrive plus à reproduire le bug pour l'inférence...
|
||||||
|
|
||||||
|
- ✅ **Oui c'est bien le cas** Clustering descendant & ascendant : vérifier qu'au cours du temps le $BICL_{asc} \geq BICL_{desc}$
|
||||||
|
|
||||||
|
- Creuser et explorer avec easy16s !
|
||||||
|
|
||||||
|
- ✅ Comparer les perfs du VAE sur Baldock avec colBiSBM par exemple
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<caption>AUC values for colBiSBM and VGAE models across cities</caption>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="empty-cells: hide;border-bottom:hidden;" colspan="1"></th>
|
||||||
|
<th style="border-bottom:hidden;padding-bottom:0; padding-left:3px;padding-right:3px;text-align: center; " colspan="2"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">AUC</div></th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:left;"> City </th>
|
||||||
|
<th style="text-align:right;"> colBiSBM </th>
|
||||||
|
<th style="text-align:right;"> Untuned VGAE </th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> Bristol </td>
|
||||||
|
<td style="text-align:right;"> 0.798 </td>
|
||||||
|
<td style="text-align:right;"> 0.755 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> Edinburgh </td>
|
||||||
|
<td style="text-align:right;"> 0.836 </td>
|
||||||
|
<td style="text-align:right;"> 0.774 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> Leeds </td>
|
||||||
|
<td style="text-align:right;"> 0.854 </td>
|
||||||
|
<td style="text-align:right;"> 0.760 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> Reading </td>
|
||||||
|
<td style="text-align:right;"> 0.867 </td>
|
||||||
|
<td style="text-align:right;"> 0.740 </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
- Dé-bugger les simulations :
|
||||||
|
|
||||||
|
- ⌛ Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
|
||||||
|
- ✅ **Non ça n'a pas l'air d'être ça**. Vérifier si problème de version tidyverse pour vapply sur l'**inférence**.
|
||||||
|
- ⌛Bon le bug ne se reproduit plus... les jobs sont juste trop longs (> 120h) j'ai relancé, il ne reste que 182/972 conditions.
|
||||||
|
|
||||||
|
- ✅ **Il suffisait de faire la màj soit même...** Si problème de parallélisation vient de pb de version *future.callr* le signaler à MIGALE.
|
||||||
|
|
||||||
|
- Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
|
||||||
|
Car densités déséquilibrées.
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby et les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Demander à JA si elle connaît des réseaux d'interactions connus par les experts (idée d'intégrer une connaissance experte et de voir les différences de structure par rapport à celle attendue)
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
## Lecture en cours
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
- ✅ @matchadoNetworkAnalysisMethods2021b ➡️ Nos données étant compositionnelles
|
||||||
|
il faut utiliser:
|
||||||
|
- CCLasso et SparCC
|
||||||
|
- HARMONIES pour zéro inflation (Binomiale négative), COZINE centered log ratio transformation compositionnalité, zéro inflation et forte précision
|
||||||
|
- MixMPLN pour générer K réseaux issus de K Poisson log Normal
|
||||||
|
- mLDM peut enlever les arêtes indirectes.
|
||||||
|
- NetComi agrège plusieurs méthodes tout en permettant l'analyse différentielle !
|
||||||
|
|
||||||
|
Si pas compositionnelles :
|
||||||
|
|
||||||
|
- Meta-Network pour arêtes indirectes et non linéaires
|
||||||
|
- Environmentally-Driven Edge detection pour corriger les effets de l'environnement
|
||||||
|
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
49
suivi/2025-25/references.bib
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{matchadoNetworkAnalysisMethods2021b,
|
||||||
|
title = {Network Analysis Methods for Studying Microbial Communities: {{A}} Mini Review},
|
||||||
|
shorttitle = {Network Analysis Methods for Studying Microbial Communities},
|
||||||
|
author = {Matchado, Monica Steffi and Lauber, Michael and Reitmeier, Sandra and Kacprowski, Tim and Baumbach, Jan and Haller, Dirk and List, Markus},
|
||||||
|
year = {2021},
|
||||||
|
month = jan,
|
||||||
|
journal = {Computational and Structural Biotechnology Journal},
|
||||||
|
volume = {19},
|
||||||
|
pages = {2687--2698},
|
||||||
|
issn = {2001-0370},
|
||||||
|
doi = {10.1016/j.csbj.2021.05.001},
|
||||||
|
urldate = {2025-06-16},
|
||||||
|
abstract = {Microorganisms including bacteria, fungi, viruses, protists and archaea live as communities in complex and contiguous environments. They engage in numerous inter- and intra- kingdom interactions which can be inferred from microbiome profiling data. In particular, network-based approaches have proven helpful in deciphering complex microbial interaction patterns. Here we give an overview of state-of-the-art methods to infer intra-kingdom interactions ranging from simple correlation- to complex conditional dependence-based methods. We highlight common biases encountered in microbial profiles and discuss mitigation strategies employed by different tools and their trade-off with increased computational complexity. Finally, we discuss current limitations that motivate further method development to infer inter-kingdom interactions and to robustly and comprehensively characterize microbial environments in the future.},
|
||||||
|
keywords = {/unread,Microbial co-occurrence networks,Microbial interactions,Network analysis,Trans-kingdom interactions},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-16T16:18:09.496Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ZCY74M2I/Matchado et al. - 2021 - Network analysis methods for studying microbial communities A mini review.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/BKZN3MI5/S2001037021001823.html}
|
||||||
|
}
|
||||||
109
suivi/2025-27/2025-27.qmd
Normal file
|
|
@ -0,0 +1,109 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 27 2025 : 30 juin - 4 juillet"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025-06-30
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données réelles :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- ❓Je n'arrive plus à reproduire le bug pour l'inférence...
|
||||||
|
- S'assurer que ça marche et relancer
|
||||||
|
|
||||||
|
- Creuser et explorer avec easy16s !
|
||||||
|
|
||||||
|
- ✅ Ajouter le tableau de comparaison du VGAE avec colBiSBM
|
||||||
|
|
||||||
|
- ⌛ **Calcul du score F1**Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
|
||||||
|
|
||||||
|
- Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
|
||||||
|
|
||||||
|
- Dé-bugger les simulations :
|
||||||
|
|
||||||
|
- ⌛ Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
|
||||||
|
- ⌛Bon le bug ne se reproduit plus... les jobs sont juste trop longs (> 120h) j'ai relancé, il ne reste que 182/972 conditions.
|
||||||
|
|
||||||
|
- ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**. Kmeans sur la densité des réseaux subdoré pour pré-partitionner et *clusteriser*.
|
||||||
|
Car densités déséquilibrées.
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- ✅ Creuser [TabNet](https://raw.githubusercontent.com/cregouby/R-toulouse-tabnet/main/Tabnet_RR2023_fr_pdf.pdf) de Christophe Regouby
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ✅ @Morton2021.11.09.467939 VAE with Multinomial Logistic Normal distribution using Isometric Log Ratio tranform.
|
||||||
|
Plus rapide que les autres méthodes et performances équivalentes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Congés P&S
|
||||||
|
|
||||||
|
- ✅ Quand est-ce qu'on ne se voit pas ? Et donc quand est-ce qu'on se voit après ?
|
||||||
|
- ✅ Calendrier partagé
|
||||||
|
|
||||||
|
### Thèse
|
||||||
|
|
||||||
|
- ✅ Que prévoir pour le CSI
|
||||||
|
- 👍 Un petit rapport
|
||||||
|
- 👍 Une présentation
|
||||||
|
|
||||||
|
- 👨🏫 **Demander à Pierre** Comment valider les enseignements comme formations Adum ?
|
||||||
|
|
||||||
|
- ✅ Des recommandations de formations, voir les cours du MathSV
|
||||||
|
|
||||||
|
### Interprétation écologiques résultats de Baldock
|
||||||
|
|
||||||
|
- ⌛ Point avec Elisa, **oui on relance**
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
93
suivi/2025-27/references.bib
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
141
suivi/2025-28/2025-28.qmd
Normal file
|
|
@ -0,0 +1,141 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 28 2025 : 07 juillet - 11 juillet"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025-07-07
|
||||||
|
date-modified: 2025-07-11
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- ❓Je n'arrive plus à reproduire le bug pour l'inférence...
|
||||||
|
- S'assurer que ça marche et relancer
|
||||||
|
|
||||||
|
- Creuser et explorer avec easy16s !
|
||||||
|
|
||||||
|
- ⌛ **Calcul du score F1**Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
|
||||||
|
- Ajouter au tableau comparatif sep BiSBM
|
||||||
|
- Regarder les codes Mangal database pour $\delta$
|
||||||
|
- ✅ Formules ci-dessous. Voir $\delta$ mais additif
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Bernoulli
|
||||||
|
En Bernoulli pas de forme analytique non plus :
|
||||||
|
Pour $\alpha_{qr}$:
|
||||||
|
$$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
$$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
|
||||||
|
|
||||||
|
Et pour $\delta_m$:
|
||||||
|
$$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
:::
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Poisson
|
||||||
|
Forme analytique mais risque de confusion ?
|
||||||
|
$$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Attente retour Pierre pour faire d'autres clustering
|
||||||
|
|
||||||
|
- ✅ Implémenter décodeur Generalized Random Dot Product.
|
||||||
|
|
||||||
|
- ✅ Réimplémentation propre et évolutive du DeepBVGAE (suivi des guidelines PyTorch Geometric)
|
||||||
|
|
||||||
|
- Vérifier si il n'y a pas de data leakage (ie je prends aussi les données de val et de test pour prédire ?)
|
||||||
|
|
||||||
|
|
||||||
|
- Dé-bugger les simulations :
|
||||||
|
|
||||||
|
- ✅ Inférence : Relancer simus d'inférence avec n = 240 pour voir si la qualité augmenter (se rassurer). En fait on est déjà à 240, j'ai relancé avec M = 4 au lieu de M = 2.
|
||||||
|
En attente résultats MIGALE -> BUG, dois creuser mais juste des problèmes techniques -> Visiblement il y a d'autres problèmes que juste le plan de parallélisation.
|
||||||
|
- ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**.
|
||||||
|
- Ajouter le produit par $\delta$ là où nécessaire
|
||||||
|
- Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
|
||||||
|
- Ajouter les tests unitaires adéquats et les vérifier
|
||||||
|
|
||||||
|
- 🛑**D'abord je lis la biblio dessus** Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- Essayer *clustering* sur `supinfo`
|
||||||
|
|
||||||
|
- Homogénéiser notations dans les supplementaries
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
- HDR VB, chapitre de modèle à blocs latents, bcp travaillé sur bipartite
|
||||||
|
OT, comparaison clustering, adaption ARI, *Largest Gap*
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ✅ @braultGeneralisationLalgorithmeLargest petit résumé de l'algo de @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Congés P&S
|
||||||
|
|
||||||
|
### Thèse
|
||||||
|
|
||||||
|
- Faire préz CSI
|
||||||
|
- Faire rapport CSI
|
||||||
|
|
||||||
|
### Interprétation écologiques résultats de Baldock
|
||||||
|
|
||||||
|
- ⌛ Point avec Elisa, **oui on relance**
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
144
suivi/2025-28/references.bib
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
478
suivi/2025-29/2025-29.qmd
Normal file
|
|
@ -0,0 +1,478 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 29 2025 : 15 juillet - 18 juillet"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025-07-15
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- ❓Je n'arrive plus à reproduire le bug pour l'inférence...
|
||||||
|
- 😫 bug encore. S'assurer que ça marche et relancer
|
||||||
|
|
||||||
|
- ⌛ **En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues**.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
|
||||||
|
Pour corriger cet effet :
|
||||||
|
- Donner la matrice identité comme features
|
||||||
|
- Corriger les degrés calculés.
|
||||||
|
- ✅ Ajouter au tableau comparatif sep BiSBM
|
||||||
|
|
||||||
|
- Pour s'assurer que colBiSBM marche, il faut comparer avec une proportion de :
|
||||||
|
- *Missing links*, ie des faux zéros
|
||||||
|
- *NA* en *Missing at random (MAR)*
|
||||||
|
|
||||||
|
- Faible performances de l'inférence :
|
||||||
|
- Vérifier que les conditions d'identifiabilité des modèles fautifs sont bien remplies.
|
||||||
|
- Récupérer des jeux de paramètres et essayer de reproduire les résultats.
|
||||||
|
|
||||||
|
- Clustering sur Doré :
|
||||||
|
- Désaggréger les réseaux et relancer le clustering sur certains auteurs.
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<caption>AUC values for colBiSBM, sep-BiSBM and VGAE models across cities</caption>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="empty-cells: hide;border-bottom:hidden;" colspan="1"></th>
|
||||||
|
<th style="border-bottom:hidden;padding-bottom:0; padding-left:3px;padding-right:3px;text-align: center; " colspan="3"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">AUC</div></th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:left;"> City </th>
|
||||||
|
<th style="text-align:right;"> colBiSBM </th>
|
||||||
|
<th style="text-align:right;"> sep-BiSBM </th>
|
||||||
|
<th style="text-align:right;"> Untuned VGAE </th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> Bristol </td>
|
||||||
|
<td style="text-align:right;"> 0.841 </td>
|
||||||
|
<td style="text-align:right;"> 0.824 </td>
|
||||||
|
<td style="text-align:right;"> 1 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> Edinburgh </td>
|
||||||
|
<td style="text-align:right;"> 0.882 </td>
|
||||||
|
<td style="text-align:right;"> 0.883 </td>
|
||||||
|
<td style="text-align:right;"> 1 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> Leeds </td>
|
||||||
|
<td style="text-align:right;"> 0.873 </td>
|
||||||
|
<td style="text-align:right;"> 0.852 </td>
|
||||||
|
<td style="text-align:right;"> 1 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> Reading </td>
|
||||||
|
<td style="text-align:right;"> 0.845 </td>
|
||||||
|
<td style="text-align:right;"> 0.837 </td>
|
||||||
|
<td style="text-align:right;"> 1 </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
- Regarder les codes Mangal database pour $\delta$
|
||||||
|
- Voir $\delta$ mais additif
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Bernoulli
|
||||||
|
En Bernoulli pas de forme analytique non plus :
|
||||||
|
Pour $\alpha_{qr}$:
|
||||||
|
$$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
$$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
|
||||||
|
|
||||||
|
Et pour $\delta_m$:
|
||||||
|
$$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
:::
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Poisson
|
||||||
|
Forme analytique mais risque de confusion ?
|
||||||
|
$$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Attente retour Pierre pour faire d'autres clustering
|
||||||
|
|
||||||
|
- Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
|
||||||
|
|
||||||
|
- Inférence finie mais résultats pas fous:
|
||||||
|
|
||||||
|
<table class="table" style="font-size: 10px; margin-left: auto; margin-right: auto;">
|
||||||
|
<caption style="font-size: initial !important;">The proportion of dataset where the correct number of blocks is selected.</caption>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="empty-cells: hide;border-bottom:hidden;" colspan="1"></th>
|
||||||
|
<th style="border-bottom:hidden;padding-bottom:0; padding-left:0px;padding-right:0px;text-align: center; " colspan="6"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">iid</div></th>
|
||||||
|
<th style="border-bottom:hidden;padding-bottom:0; padding-left:0px;padding-right:0px;text-align: center; " colspan="6"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">$\pi$</div></th>
|
||||||
|
<th style="border-bottom:hidden;padding-bottom:0; padding-left:0px;padding-right:0px;text-align: center; " colspan="6"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">$\rho$</div></th>
|
||||||
|
<th style="border-bottom:hidden;padding-bottom:0; padding-left:0px;padding-right:0px;text-align: center; " colspan="6"><div style="border-bottom: 1px solid #ddd; padding-bottom: 5px; ">$\pi\rho$</div></th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:right;"> $\epsilon_{\alpha}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \lt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} = 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \gt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \lt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} = 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \gt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \lt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} = 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \gt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \lt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} = 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \gt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \lt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} = 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \gt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \lt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} = 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \gt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \lt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} = 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_1} \gt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \lt 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} = 4}$ </th>
|
||||||
|
<th style="text-align:left;"> $\bm{1}_{\widehat{Q_2} \gt 4}$ </th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.00 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.06 </td>
|
||||||
|
<td style="text-align:left;"> 0.19 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.81 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.24 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.76 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.02 $\pm$ 0.01 </td>
|
||||||
|
<td style="text-align:left;"> 0.33 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.65 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.26 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.74 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.17 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.83 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.02 $\pm$ 0.01 </td>
|
||||||
|
<td style="text-align:left;"> 0.2 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.78 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.06 $\pm$ 0.02 </td>
|
||||||
|
<td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.06 $\pm$ 0.02 </td>
|
||||||
|
<td style="text-align:left;"> 0.01 $\pm$ 0.01 </td>
|
||||||
|
<td style="text-align:left;"> 0.88 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.11 $\pm$ 0.03 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.09 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.94 $\pm$ 0.02 </td>
|
||||||
|
<td style="text-align:left;"> 0.06 $\pm$ 0.02 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.91 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.09 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.1 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.9 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.12 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.94 $\pm$ 0.02 </td>
|
||||||
|
<td style="text-align:left;"> 0.06 $\pm$ 0.02 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.91 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.09 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.26 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.74 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.3 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.7 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.84 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.16 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.83 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.17 $\pm$ 0.04 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.15 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.85 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.15 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.86 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.14 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.34 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.66 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.3 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.7 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.81 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.19 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.8 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.2 $\pm$ 0.04 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.18 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.84 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.16 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.36 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.64 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.35 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.65 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.87 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.13 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.82 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.18 $\pm$ 0.04 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.21 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.92 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.08 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.89 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.11 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.4 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.6 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.39 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.61 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.84 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.16 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.84 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.16 $\pm$ 0.04 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:right;"> 0.24 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.88 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.12 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.85 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.15 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.47 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.53 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 1 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.99 $\pm$ 0.01 </td>
|
||||||
|
<td style="text-align:left;"> 0.01 $\pm$ 0.01 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.4 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0.6 $\pm$ 0.05 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.85 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0.15 $\pm$ 0.03 </td>
|
||||||
|
<td style="text-align:left;"> 0 </td>
|
||||||
|
<td style="text-align:left;"> 0.82 $\pm$ 0.04 </td>
|
||||||
|
<td style="text-align:left;"> 0.18 $\pm$ 0.04 </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
- ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**.
|
||||||
|
- Ajouter le produit par $\delta$ là où nécessaire
|
||||||
|
- Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
|
||||||
|
- Ajouter les tests unitaires adéquats et les vérifier
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- Essayer *clustering* sur `supinfo`
|
||||||
|
|
||||||
|
- Homogénéiser notations dans les supplementaries
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Creuser et explorer avec easy16s !
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
- HDR VB, chapitre de modèle à blocs latents, bcp travaillé sur bipartite
|
||||||
|
OT, comparaison clustering, adaption ARI, *Largest Gap*
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Congés P&S
|
||||||
|
|
||||||
|
### Thèse
|
||||||
|
|
||||||
|
- Faire préz CSI
|
||||||
|
- Faire rapport CSI
|
||||||
|
|
||||||
|
### Interprétation écologiques résultats de Baldock
|
||||||
|
|
||||||
|
- ⌛ Point avec Elisa, **oui on relance**
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
144
suivi/2025-29/references.bib
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
158
suivi/2025-33/2025-33.qmd
Normal file
|
|
@ -0,0 +1,158 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 33 2025 : 11 août - 15 août"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025-08-14
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- ❓Je n'arrive plus à reproduire le bug pour l'inférence...
|
||||||
|
- 😫 bug encore. S'assurer que ça marche et relancer
|
||||||
|
|
||||||
|
- ⌛ **En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues**.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
|
||||||
|
Pour corriger cet effet :
|
||||||
|
- Donner la matrice identité comme features
|
||||||
|
- Corriger les degrés calculés.
|
||||||
|
- ✅ Ajouter au tableau comparatif sep BiSBM
|
||||||
|
|
||||||
|
- ✅ Pour s'assurer que colBiSBM marche, il faut comparer avec une proportion de :
|
||||||
|
- *Missing links*, ie des faux zéros
|
||||||
|
- *NA* en *Missing at random (MAR)*
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
- Faible performances de l'inférence :
|
||||||
|
- Vérifier que les conditions d'identifiabilité des modèles fautifs sont bien remplies.
|
||||||
|
- Récupérer des jeux de paramètres et essayer de reproduire les résultats.
|
||||||
|
|
||||||
|
- Clustering sur Doré :
|
||||||
|
- ✅ Désaggréger les réseaux et relancer le clustering sur certains auteurs.
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- ⌛ Chamberlain et al semble intéressant à regarder !
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
- Regarder les codes Mangal database pour $\delta$
|
||||||
|
- Voir $\delta$ mais additif
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Bernoulli
|
||||||
|
En Bernoulli pas de forme analytique non plus :
|
||||||
|
Pour $\alpha_{qr}$:
|
||||||
|
$$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
$$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
|
||||||
|
|
||||||
|
Et pour $\delta_m$:
|
||||||
|
$$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
:::
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Poisson
|
||||||
|
Forme analytique mais risque de confusion ?
|
||||||
|
$$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
|
||||||
|
|
||||||
|
- ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**.
|
||||||
|
- Ajouter le produit par $\delta$ là où nécessaire
|
||||||
|
- Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
|
||||||
|
- Ajouter les tests unitaires adéquats et les vérifier
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- Essayer *clustering* sur `supinfo`
|
||||||
|
|
||||||
|
- Homogénéiser notations dans les supplementaries
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- ✅ Ouvert les donnés Compendium Europe avec easy16s, premières remarques : en dessous de famille peu d'information
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ✅ Intro : Présentation de toutes les recherches, très diversifiée et de l'application aux propriétées théoriques en passant par des codes efficients. Creuser le lien entre *les modèles à var latentes et le transport optimal*. Le chap 4 a l'air intéressant notamment le **mélange de modèles de segmentation**.
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Congés P&S
|
||||||
|
|
||||||
|
### Thèse
|
||||||
|
|
||||||
|
- Faire préz CSI
|
||||||
|
- Faire rapport CSI
|
||||||
|
|
||||||
|
### Interprétation écologiques résultats de Baldock
|
||||||
|
|
||||||
|
- ⌛ Point avec Elisa, **oui on relance**
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
BIN
suivi/2025-33/figs/auc-model.png
Normal file
|
After Width: | Height: | Size: 38 KiB |
144
suivi/2025-33/references.bib
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
147
suivi/2025-35/2025-35.qmd
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 35 2025 : 25 août - 29 août"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025-08-29
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- ❓Je n'arrive plus à reproduire le bug pour l'inférence...
|
||||||
|
- 😫 bug encore. S'assurer que ça marche et relancer
|
||||||
|
|
||||||
|
- ⌛ A Roscoff avec Julie et Pierre nous avons constaté que c'était l'extraction des dyades pour le calcul des métriques qui était incorrecte. Maintenant c'est corrigé et ça fonctionne !
|
||||||
|
En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
|
||||||
|
Pour corriger cet effet :
|
||||||
|
- Donner la matrice identité comme features
|
||||||
|
- Corriger les degrés calculés.
|
||||||
|
|
||||||
|
- Faible performances de l'inférence :
|
||||||
|
- Vérifier que les conditions d'identifiabilité des modèles fautifs sont bien remplies.
|
||||||
|
- Récupérer des jeux de paramètres et essayer de reproduire les résultats.
|
||||||
|
|
||||||
|
- Clustering sur Doré :
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
- Regarder les codes Mangal database pour $\delta$
|
||||||
|
- Voir $\delta$ mais additif
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Bernoulli
|
||||||
|
En Bernoulli pas de forme analytique non plus :
|
||||||
|
Pour $\alpha_{qr}$:
|
||||||
|
$$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
$$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
|
||||||
|
|
||||||
|
Et pour $\delta_m$:
|
||||||
|
$$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
:::
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Poisson
|
||||||
|
Forme analytique mais risque de confusion ?
|
||||||
|
$$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
|
||||||
|
|
||||||
|
- ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**.
|
||||||
|
- Ajouter le produit par $\delta$ là où nécessaire
|
||||||
|
- Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
|
||||||
|
- Ajouter les tests unitaires adéquats et les vérifier
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- Essayer *clustering* sur `supinfo`
|
||||||
|
|
||||||
|
- Homogénéiser notations dans les supplementaries
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Voir avec Mahendra à l'occasion du CSI
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Congés P&S
|
||||||
|
|
||||||
|
### Thèse
|
||||||
|
|
||||||
|
- Faire préz CSI
|
||||||
|
- Faire rapport CSI
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
144
suivi/2025-35/references.bib
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
144
suivi/2025-38/2025-38.qmd
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 38 2025 : 15 septembre - 19 septembre"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025-09-19
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- ❓Je n'arrive plus à reproduire le bug pour l'inférence...
|
||||||
|
- 😫 bug encore. S'assurer que ça marche et relancer
|
||||||
|
|
||||||
|
- ⌛ A Roscoff avec Julie et Pierre nous avons constaté que c'était l'extraction des dyades pour le calcul des métriques qui était incorrecte. Maintenant c'est corrigé et ça fonctionne !
|
||||||
|
En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
|
||||||
|
Pour corriger cet effet :
|
||||||
|
- Donner la matrice identité comme features
|
||||||
|
- Corriger les degrés calculés.
|
||||||
|
|
||||||
|
- ⚠️ Discuter intersection simulations
|
||||||
|
- Clustering sur Doré :
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
- Regarder les codes Mangal database pour $\delta$
|
||||||
|
- Voir $\delta$ mais additif
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Bernoulli
|
||||||
|
En Bernoulli pas de forme analytique non plus :
|
||||||
|
Pour $\alpha_{qr}$:
|
||||||
|
$$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
$$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
|
||||||
|
|
||||||
|
Et pour $\delta_m$:
|
||||||
|
$$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
:::
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Poisson
|
||||||
|
Forme analytique mais risque de confusion ?
|
||||||
|
$$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
|
||||||
|
|
||||||
|
- ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**.
|
||||||
|
- Ajouter le produit par $\delta$ là où nécessaire
|
||||||
|
- Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
|
||||||
|
- Ajouter les tests unitaires adéquats et les vérifier
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- Essayer *clustering* sur `supinfo`
|
||||||
|
|
||||||
|
- Homogénéiser notations dans les supplementaries
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Voir avec Mahendra à l'occasion du CSI
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Congés P&S
|
||||||
|
|
||||||
|
### Thèse
|
||||||
|
|
||||||
|
- Faire préz CSI
|
||||||
|
- Faire rapport CSI
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
144
suivi/2025-38/references.bib
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
168
suivi/2025-43/2025-43.qmd
Normal file
|
|
@ -0,0 +1,168 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 43 2025 : 20 octobre - 24 octobre"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 10 20
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Finir le papier :
|
||||||
|
- Re-structurer le plan, mon plan, Donnet et Barbillon, échelle méso et comparaison inter réseau et noeuds non partagés.
|
||||||
|
- ✅ Écrire en annexe le BIC-L, faire attention à ajouter l'entropie à la toute fin en mentionnant
|
||||||
|
- ⌛ Fusionner VGAE et information transfer (missing links seulement) donc refaire tourner sur même données qu'en R. A adapter pour Python et pouvoir intégrer dans la figure. (raccourcit).
|
||||||
|
- Faire sep-VGAE (seulement sur le réseaux avec missing links) et VGAE avec les 4 réseaux.
|
||||||
|
En train de reproduire les résultats, AUC stable autour de 0.7
|
||||||
|
- Remplacer *Information tranfer on simu* par Network partitioning.
|
||||||
|
- ⌛ Écrire le poster avec un titre aguicheur "Are my pollinators your pollinators: ...":
|
||||||
|
Commencé contenu à déterminer avec Pierre et Sophie
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
<!-- - Idée clustering unipartite graphes des métros
|
||||||
|
<div class="embed-container">
|
||||||
|
<iframe src="https://csun.uic.edu/wp-content/uploads/sites/1080/2023/12/pdf_7.pdf" width=100% height="475px" style="position: relative;">
|
||||||
|
</iframe>
|
||||||
|
</div> -->
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
- ❓Je n'arrive plus à reproduire le bug pour l'inférence...
|
||||||
|
- 😫 bug encore. S'assurer que ça marche et relancer
|
||||||
|
|
||||||
|
- ⌛ A Roscoff avec Julie et Pierre nous avons constaté que c'était l'extraction des dyades pour le calcul des métriques qui était incorrecte. Maintenant c'est corrigé et ça fonctionne !
|
||||||
|
En fait je donne tous les degrés donc le GNN a juste à retrouver les arêtes non vues.Revérifier que j'entraîne correctement le VGAE car résultats de généralisation trop bons sur les autres réseaux Doré, ce qui est étonnant
|
||||||
|
Pour corriger cet effet :
|
||||||
|
- Donner la matrice identité comme features
|
||||||
|
- Corriger les degrés calculés.
|
||||||
|
|
||||||
|
- ⚠️ Discuter intersection simulations
|
||||||
|
- Clustering sur Doré :
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
- Regarder les codes Mangal database pour $\delta$
|
||||||
|
- Voir $\delta$ mais additif
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Bernoulli
|
||||||
|
En Bernoulli pas de forme analytique non plus :
|
||||||
|
Pour $\alpha_{qr}$:
|
||||||
|
$$ \sum_{m=1}^M \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\alpha_{qr}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
$$\Leftrightarrow \sum_m \frac{e^m_{qr}}{\alpha_{qr}} + \frac{1}{\alpha_{qr}+\delta_m-1} (n^m_{qr}-e^m_{qr}) = 0$$
|
||||||
|
|
||||||
|
Et pour $\delta_m$:
|
||||||
|
$$ \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \sum_{q=1}^{Q_1} \sum_{r=1}^{Q_2} \tau_{iq}^{1,m}\tau_{jr}^{2,m}(\frac{X_{ij}^m}{\delta_{m}} + \frac{(1-X_{ij}^m)}{\alpha_{qr} + \delta_m -1}) = 0$$
|
||||||
|
:::
|
||||||
|
|
||||||
|
:::{.callout-note}
|
||||||
|
### $\delta$ additif Poisson
|
||||||
|
Forme analytique mais risque de confusion ?
|
||||||
|
$$\widehat{\delta_m} = \frac{\sum_{q,r} e^m_{qr}}{\sum_{q,r} n^m_{qr}},~\widehat{\alpha_{qr}} = \frac{\sum_{m} e^m_{qr}}{\sum_{m} n^m_{qr}} $$
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Regarder la liste des cours du MathSV et de l'Université Paris-Saclay.
|
||||||
|
|
||||||
|
- ⌛ **Plutôt regarder pour introduire un modèle $\delta$-colBiSBM**.
|
||||||
|
- Ajouter le produit par $\delta$ là où nécessaire
|
||||||
|
- Ajouter les modèles $\delta$, $\delta\pi, \dots$ et les blocs conditionnels
|
||||||
|
- Ajouter les tests unitaires adéquats et les vérifier
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- Essayer *clustering* sur `supinfo`
|
||||||
|
|
||||||
|
- Homogénéiser notations dans les supplementaries
|
||||||
|
|
||||||
|
:::{#ref-kmeans-vae}
|
||||||
|
|
||||||
|
- Faire GNN-VAE Doré et sub-Doré avec kmeans et clustering sur l'espace latent
|
||||||
|
J'ai commencé à regarder un peu
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Voir avec Mahendra à l'occasion du CSI
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Lire Papiers compositional data (Aitchison et al. intro)
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
\begin{align*}
|
||||||
|
i \rightarrow &~N^1_i \subseteq N^2_i \subseteq N^3_i & \text{Taxonomie}\\
|
||||||
|
Z^0_i \overset{?}{=} & Z^1_i \overset{?}{=} Z^2_i \overset{?}{=} Z^3_i & \text{Groupes fonctionnels}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
### Congés P&S
|
||||||
|
|
||||||
|
### Thèse
|
||||||
|
|
||||||
|
- Faire préz CSI
|
||||||
|
- Faire rapport CSI
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence
|
||||||
|
|
||||||
|
- pbs : variance, bcp de zero, covariables, offset et taxonomie (Reseaux arretes differents niveaux : Genre, OTU ...)
|
||||||
|
|
||||||
|
> Combine networks at different taxonomic levels
|
||||||
|
|
||||||
|
- Inférence + GREMLINS
|
||||||
144
suivi/2025-43/references.bib
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
125
suivi/2025-44/2025-44.qmd
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 44 2025 : 27 octobre - 31 octobre"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 10 27
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Finir le papier :
|
||||||
|
- Re-structurer le plan, mon plan, Donnet et Barbillon, échelle méso et comparaison inter réseau et noeuds non partagés.
|
||||||
|
- Partie Baldock: Ajouter l'ordre des modèles préférés
|
||||||
|
- Envoyer Info transfer en annexe et remplacer par Network partitioning
|
||||||
|
|
||||||
|
- ✅ Fusionner VGAE et information transfer (missing links seulement) donc refaire tourner sur même données qu'en R. A adapter pour Python et pouvoir intégrer dans la figure. (raccourcit).
|
||||||
|
- ✅ Faire sep-VGAE (seulement sur le réseaux avec missing links) et VGAE avec les 4 réseaux.
|
||||||
|
En train de reproduire les résultats, AUC stable autour de 0.7
|
||||||
|
- Remplacer *Information tranfer on simu* par Network partitioning.
|
||||||
|
- ✅ Écrire le poster avec un titre aguicheur "Are my pollinators your pollinators: ...":
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
<!-- - Idée clustering unipartite graphes des métros
|
||||||
|
<div class="embed-container">
|
||||||
|
<iframe src="https://csun.uic.edu/wp-content/uploads/sites/1080/2023/12/pdf_7.pdf" width=100% height="475px" style="position: relative;">
|
||||||
|
</iframe>
|
||||||
|
</div> -->
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
|
||||||
|
- 👶 (délégué à stagiaire) Clustering sur Doré :
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- Essayer *clustering* sur `supinfo`
|
||||||
|
|
||||||
|
- ✅ Homogénéiser notations dans les supplementaries
|
||||||
|
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- 🆕⌛ Papier Julie Negative Binomiale
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
#### Réflexion
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
- 🆕 Regarder NetComi
|
||||||
|
- 🆕 Regarder OneNet car aggrégation plus robuste
|
||||||
|
- 🆕 Réfléchir sens d'aggréger les données ou de les diviser
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕 SBM à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
144
suivi/2025-44/references.bib
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
125
suivi/2025-45/2025-45.qmd
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 45 2025 : 03 novembre - 06 novembre"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 11 03
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
---
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Finir le papier :
|
||||||
|
- ❓ Fait ? Re-structurer le plan, mon plan, Donnet et Barbillon, échelle méso et comparaison inter réseau et noeuds non partagés.
|
||||||
|
- ✅ Partie Baldock: Ajouter l'ordre des modèles préférés et vérifier mais BICLsep < BICL pirho < BICL iid
|
||||||
|
- ✅ Toutes les simus en annexe. Envoyer Info transfer en annexe et remplacer par Network partitioning
|
||||||
|
- Codes pour le papier :
|
||||||
|
- Nettoyer les scripts
|
||||||
|
- Faire un joli README
|
||||||
|
- ❓Faire des notebooks
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
|
||||||
|
- 👶 (délégué à Mona) Clustering sur Doré :
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- ⌛ Essayer *clustering* sur `supinfo`
|
||||||
|
- CAH et Kmeans tendent vers faire $K = 13$ clusters sur les supinfos
|
||||||
|
- Enrichir avec des métriques sur les réseaux (nestedness, connectance autres ?)
|
||||||
|
- Demander à Elisa pour la signification des métadonnées
|
||||||
|
- Demander à Elisa une fois vu cohérences de groupe voir pour interprétation écologiques ?
|
||||||
|
- Algo de clustering sur les groupes trouvés
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- ✅ Papier Julie Negative Binomiale
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
- Idée des groupes sur la base de distance phylogénétique:
|
||||||
|
- En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
|
||||||
|
- En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
|
||||||
|
- Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
|
||||||
|
- Lire Papier UniFrac
|
||||||
|
|
||||||
|
#### Réflexion
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
- 🆕 Regarder NetComi
|
||||||
|
- 🆕 Regarder OneNet car aggrégation plus robuste
|
||||||
|
- 🆕 Réfléchir sens d'aggréger les données ou de les diviser
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕 SBM à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
144
suivi/2025-45/references.bib
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
185
suivi/2025-50/2025-50.qmd
Normal file
|
|
@ -0,0 +1,185 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 50 2025 : 08 décembre - 12 décembre"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 12 12
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
# from: markdown+latex_macros
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- ⚠️ IL Y A UNE TYPO SUR LE SIGNE DE L'ENTROPIE POUR LE PAPIER: $- \mathcal{H}$ au lieu de $+\mathcal{H}$
|
||||||
|
|
||||||
|
- Codes pour le papier :
|
||||||
|
- Nettoyer les scripts
|
||||||
|
- Faire un joli README
|
||||||
|
- ❓Faire des notebooks
|
||||||
|
|
||||||
|
- Faire tourner clustering sur Trojelsgaard
|
||||||
|
|
||||||
|
- Réussir à reproduire résultat de @abramovStructureKnowsBest
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
- Pour clustering de collections sur données ~~réelles~~ :
|
||||||
|
→ L'intuition de Pierre semble être confirmé, les dissimilarités semblent arrêter de varier sensiblement pour de grandes valeurs $(Q_1,Q_2)$.
|
||||||
|
|
||||||
|
- 👶 (délégué à Mona) Clustering sur Doré :
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- ⌛ Essayer *clustering* sur `supinfo`
|
||||||
|
- CAH et Kmeans tendent vers faire $K = 13$ clusters sur les supinfos
|
||||||
|
- Enrichir avec des métriques sur les réseaux (nestedness, connectance autres ?)
|
||||||
|
- Demander à Elisa pour la signification des métadonnées
|
||||||
|
- Demander à Elisa une fois vu cohérences de groupe voir pour interprétation écologiques ?
|
||||||
|
- Algo de clustering sur les groupes trouvés
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
#### Modèle avec covariables sur probas d'appartenances aux groupes
|
||||||
|
|
||||||
|
Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
|
||||||
|
W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
|
||||||
|
Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
|
||||||
|
|
||||||
|
$$
|
||||||
|
\ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
|
||||||
|
+ \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r} \\
|
||||||
|
- \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
|
||||||
|
$$
|
||||||
|
|
||||||
|
Plusieurs possibilités pour la définition de $\rho_r^j$
|
||||||
|
|
||||||
|
##### Modèle 1 (Tabouy)
|
||||||
|
|
||||||
|
Dénominateur pas correct, ne somme pas à 1.
|
||||||
|
|
||||||
|
$\rho_r^j = \frac{\exp{\beta_r X_j\mathbf{1}_{\{r\neq R\}}}}{1+\sum_{s=1}^{R-1} \beta_s X_j}, \beta_R = 0$ et $\rho_R^{j} = \frac{1}{1+\sum_{s=1}^{R-1} \beta_s X_j}$ (pas de compréhension intuitive)
|
||||||
|
|
||||||
|
La partie pertinente de l'ELBO devient:
|
||||||
|
$$
|
||||||
|
P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j \mathbb{1}_{r\neq R} - \log (1+\sum_{s=1}^{R-1} \beta_s X_j))]
|
||||||
|
$$
|
||||||
|
|
||||||
|
Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
|
||||||
|
\begin{align*}
|
||||||
|
\dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j}{1+\sum_{s=1}^{R-1} \exp{\beta_s X_j}} \biggr]\\
|
||||||
|
& = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \frac{1}{1+\sum_{s=1}^{R-1} \beta_s X_j} \bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_R^j \bigr) X_j\biggr]
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
❓ Gradient mesure l'écart entre probas a posteriori et la proba a priori du groupe de référence ?
|
||||||
|
|
||||||
|
**Conclusion**: Il manque l'exponentielle cette formulation ne somme pas à 1.
|
||||||
|
|
||||||
|
##### Modèle Sophie
|
||||||
|
|
||||||
|
Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Mais il y a besoin de poser une contrainte sur l'un des $(\beta_r)_{r=1,\dots,R}$, ici $\beta_R = 0$.
|
||||||
|
|
||||||
|
La partie pertinente de l'ELBO devient:
|
||||||
|
$$
|
||||||
|
P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
|
||||||
|
$$
|
||||||
|
|
||||||
|
Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
|
||||||
|
\begin{align*}
|
||||||
|
\dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
|
||||||
|
& = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr]
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
- Idée des groupes sur la base de distance phylogénétique:
|
||||||
|
- En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
|
||||||
|
- En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
|
||||||
|
- Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
|
||||||
|
- Lire Papier UniFrac
|
||||||
|
|
||||||
|
#### Réflexion
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
- 🆕 Regarder NetComi
|
||||||
|
- 🆕 Regarder OneNet car aggrégation plus robuste
|
||||||
|
- 🆕 Réfléchir sens d'aggréger les données ou de les diviser
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕 SBM à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
155
suivi/2025-50/references.bib
Normal file
|
|
@ -0,0 +1,155 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{abramovStructureKnowsBest,
|
||||||
|
title = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
|
||||||
|
author = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
334
suivi/2025-51/2025-51.qmd
Normal file
|
|
@ -0,0 +1,334 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 51 2025 : 15 décembre - 19 décembre"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2025 12 19
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
# from: markdown+latex_macros
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- ✅ **C'est fait** Passer version article flat dans Gitlab du papier et nettoyer au minimum sur une branche clean.
|
||||||
|
|
||||||
|
- ✅ Corrigée !⚠️ IL Y A UNE TYPO SUR LE SIGNE DE L'ENTROPIE POUR LE PAPIER: $- \mathcal{H}$ au lieu de $+\mathcal{H}$
|
||||||
|
|
||||||
|
- ✅ Faire tourner clustering sur Trojelsgaard. **Fait mais ne sépare personne**.
|
||||||
|
|
||||||
|
- Petites opérations sur les OTUs (regarder la matrice dans les yeux):
|
||||||
|
- Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
|
||||||
|
- ✅ **Dans un RMD sur Human Microbiome Compendium** Dessiner les graphiques : $\Var[OTU] = f(\Esp[OTU]), \frac{\Var[OTU]}{\Esp[OTU]^2} = f(\Esp[OTU])$ et $\frac{\Var[OTU]}{\Esp[OTU]} = f(\Esp[OTU]) (\approx 1)$ si les données suivent une loi de Poisson.
|
||||||
|
- HMC sur-dispersés (au-dessus bissectrice)
|
||||||
|
- Enterotype phyloseq sous-disp
|
||||||
|
- Regarder la proportion de 1. taxon rares, 2. zeros.
|
||||||
|
- Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
|
||||||
|
- *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
|
||||||
|
|
||||||
|
- ✅ Faire tourner un LBM sur Human Gut et voir si ça plante sinon, **ça plante, la ram est surchargée.**
|
||||||
|
- ❎⌛ Je tente avec SparseBM de JBL sur Python. **Ne gère pas le Poisson**
|
||||||
|
- Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
|
||||||
|
|
||||||
|
- Increasing size :
|
||||||
|

|
||||||
|
|
||||||
|
- ⌛ Prendre jeu de données exemple de phyloseq :
|
||||||
|
- ✅ 😞 enterotype tourne mais pas bon résultats (semble deux blocs échantillons mais pas vu par le modèle).
|
||||||
|
- 🕑 des jeux de données de Mahendra ne tourne pas (phase forward interminable).
|
||||||
|
|
||||||
|
- Relire @peixotoHierarchicalBlockStructures2014
|
||||||
|
- Regarder les gens qui citent les travaux de Peixoto
|
||||||
|
|
||||||
|
- Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
|
||||||
|
|
||||||
|
:::{.callout-note title="Idées"}
|
||||||
|
|
||||||
|
- Travailler sur Fungus Tree network
|
||||||
|
- Comparaison covar prop avec GREMLINS multipartite sur (log(dist_phylo), fungus-tree)
|
||||||
|
- Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
|
||||||
|
- ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
|
||||||
|
- ⌛ Simulations avec $n_2$ croissant lancée sur Migale
|
||||||
|
- Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
|
||||||
|
- ✅ **Inutile car besoin du primal** Chercher à formuler le problème dual (s'il existe?) de l'optimisation du LBM. Peut-être possible d'aller plus vite alors ? @eq-dual
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
- Codes pour le papier :
|
||||||
|
- Nettoyer les scripts
|
||||||
|
- Faire un joli README
|
||||||
|
- ❓Faire des notebooks
|
||||||
|
|
||||||
|
- Réussir à reproduire résultat de @abramovStructureKnowsBest
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- 👶 (délégué à Mona) Clustering sur Doré :
|
||||||
|
- Regarder pour les couples date+nom les études et le nombre de réseaux analysables (Possible demander à Élisa)
|
||||||
|
- ⌛ Chamberlain et al semble intéressant à regarder ! Voir le Rmarkdown
|
||||||
|
- Clusteriser sur la base des noms et voir parmi les réseaux Européens (désagrégés ?)
|
||||||
|
- Si M > 10, alors voir si je retrouve les mêmes résultats que dans les études.
|
||||||
|
|
||||||
|
- Regarder *Largest gap* sur réseaux Doré
|
||||||
|
|
||||||
|
- ⌛ Essayer *clustering* sur `supinfo`
|
||||||
|
- CAH et Kmeans tendent vers faire $K = 13$ clusters sur les supinfos
|
||||||
|
- Enrichir avec des métriques sur les réseaux (nestedness, connectance autres ?)
|
||||||
|
- Demander à Elisa pour la signification des métadonnées
|
||||||
|
- Demander à Elisa une fois vu cohérences de groupe voir pour interprétation écologiques ?
|
||||||
|
- Algo de clustering sur les groupes trouvés
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
#### Modèle avec covariables sur probas d'appartenances aux groupes
|
||||||
|
|
||||||
|
Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
|
||||||
|
W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
|
||||||
|
Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
|
||||||
|
|
||||||
|
$$
|
||||||
|
\ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
|
||||||
|
+ \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r} \\
|
||||||
|
- \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
|
||||||
|
$$
|
||||||
|
|
||||||
|
Plusieurs possibilités pour la définition de $\rho_r^j$
|
||||||
|
|
||||||
|
##### Modèle Sophie
|
||||||
|
|
||||||
|
Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Mais il y a besoin de poser une contrainte sur l'un des $(\beta_r)_{r=1,\dots,R}$, ici $\beta_R = 0$.
|
||||||
|
|
||||||
|
La partie pertinente de l'ELBO devient:
|
||||||
|
$$
|
||||||
|
P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
|
||||||
|
$${#eq-modele-covar-prop}
|
||||||
|
|
||||||
|
Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
|
||||||
|
\begin{align*}
|
||||||
|
\dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
|
||||||
|
& = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr]
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Idée du problème dual
|
||||||
|
Les distributions variationnelles sont définies par :
|
||||||
|
|
||||||
|
$$
|
||||||
|
q(Z,W)
|
||||||
|
=
|
||||||
|
\prod_{i=1}^{n_1} q_i(Z_i)
|
||||||
|
\prod_{j=1}^{n_2} q_j(W_j),
|
||||||
|
$$
|
||||||
|
|
||||||
|
avec
|
||||||
|
$$
|
||||||
|
q_i(Z_i=q)=\tau_{iq}^{(1)},
|
||||||
|
\qquad
|
||||||
|
q_j(W_j=r)=\tau_{jr}^{(2)}.
|
||||||
|
$$
|
||||||
|
|
||||||
|
Les contraintes de normalisation sont :
|
||||||
|
$$
|
||||||
|
\sum_{q=1}^Q \tau_{iq}^{(1)} = 1,
|
||||||
|
\qquad
|
||||||
|
\sum_{r=1}^R \tau_{jr}^{(2)} = 1.
|
||||||
|
$$
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
##### Lagrangien
|
||||||
|
|
||||||
|
Le lagrangien du problème variationnel s’écrit :
|
||||||
|
$$
|
||||||
|
\mathcal{L}\!\left(
|
||||||
|
\tau^{(1)},\tau^{(2)},(\lambda_i)_{i=1}^{n_1},(\mu_j)_{j=1}^{n_2}
|
||||||
|
\right)
|
||||||
|
=
|
||||||
|
\ELBORTheta
|
||||||
|
+
|
||||||
|
\sum_{i=1}^{n_1} \lambda_i
|
||||||
|
\left(1-\sum_{q=1}^Q \tau_{iq}^{(1)}\right)
|
||||||
|
+
|
||||||
|
\sum_{j=1}^{n_2} \mu_j
|
||||||
|
\left(1-\sum_{r=1}^R \tau_{jr}^{(2)}\right),
|
||||||
|
$$
|
||||||
|
où $\ELBORTheta$ désigne la borne inférieure variationnelle
|
||||||
|
associée au modèle et aux paramètres $\Theta$.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
##### Problème primal (conditions d’optimalité)
|
||||||
|
|
||||||
|
En dérivant le lagrangien par rapport aux variables variationnelles
|
||||||
|
$\tau^{(1)}$ et $\tau^{(2)}$, puis en égalisant à zéro, on obtient
|
||||||
|
les équations de point fixe suivantes :
|
||||||
|
|
||||||
|
$$
|
||||||
|
\tau_{iq}^{(1)}
|
||||||
|
\propto
|
||||||
|
\pi_q^{(t)}
|
||||||
|
\prod_{j=1}^{n_2}
|
||||||
|
\prod_{r=1}^{R}
|
||||||
|
f\!\left(Y_{ij};\alpha_{qr}^{(t)}\right)^{\tau_{jr}^{(2),(t+1)}},
|
||||||
|
\quad
|
||||||
|
\forall i=1,\dots,n_1,\;
|
||||||
|
q=1,\dots,Q,
|
||||||
|
$$
|
||||||
|
|
||||||
|
$$
|
||||||
|
\tau_{jr}^{(2)}
|
||||||
|
\propto
|
||||||
|
\rho_r^{(t)}
|
||||||
|
\prod_{i=1}^{n_1}
|
||||||
|
\prod_{q=1}^{Q}
|
||||||
|
f\!\left(Y_{ij};\alpha_{qr}^{(t)}\right)^{\tau_{iq}^{(1),(t+1)}},
|
||||||
|
\quad
|
||||||
|
\forall j=1,\dots,n_2,\;
|
||||||
|
r=1,\dots,R,
|
||||||
|
$$
|
||||||
|
où :
|
||||||
|
|
||||||
|
- $\pi_q^{(t)}$ et $\rho_r^{(t)}$ sont les proportions de classes,
|
||||||
|
- $f(\cdot;\alpha_{qr})$ est la loi d'émission du modèle,
|
||||||
|
- $\alpha_{qr}^{(t)}$ désigne les paramètres de bloc à l’itération $t$.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
##### Constantes de normalisation
|
||||||
|
|
||||||
|
Les constantes de normalisation associées sont données par :
|
||||||
|
|
||||||
|
$$
|
||||||
|
T^{(1),(t)}_i
|
||||||
|
=
|
||||||
|
\sum_{q=1}^{Q}
|
||||||
|
\pi_q^{(t)}
|
||||||
|
\exp\!\left(
|
||||||
|
\sum_{j=1}^{n_2}
|
||||||
|
\sum_{r=1}^{R}
|
||||||
|
\tau_{jr}^{(2)}
|
||||||
|
\log f\!\left(Y_{ij};\alpha_{qr}^{(t)}\right)
|
||||||
|
\right),
|
||||||
|
$$
|
||||||
|
|
||||||
|
$$
|
||||||
|
T^{(2),(t)}_j
|
||||||
|
=
|
||||||
|
\sum_{r=1}^{R}
|
||||||
|
\rho_r^{(t)}
|
||||||
|
\exp\!\left(
|
||||||
|
\sum_{i=1}^{n_1}
|
||||||
|
\sum_{q=1}^{Q}
|
||||||
|
\tau_{iq}^{(1)}
|
||||||
|
\log f\!\left(Y_{ij};\alpha_{qr}^{(t)}\right)
|
||||||
|
\right).
|
||||||
|
$$
|
||||||
|
|
||||||
|
Ainsi, les mises à jour normalisées s’écrivent :
|
||||||
|
$$
|
||||||
|
\tau_{iq}^{(1)} = \frac{1}{T^{(1),(t)}_i}(\cdots),
|
||||||
|
\qquad
|
||||||
|
\tau_{jr}^{(2)} = \frac{1}{T^{(2),(t)}_j}(\cdots).
|
||||||
|
$$
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
##### Interprétation duale
|
||||||
|
|
||||||
|
Les multiplicateurs de Lagrange s’identifient alors à :
|
||||||
|
$$
|
||||||
|
\lambda_i = -\log T^{(1),(t)}_i - 1,
|
||||||
|
\qquad
|
||||||
|
\mu_j = -\log T^{(2),(t)}_j - 1,
|
||||||
|
$$ {#eq-dual}
|
||||||
|
et le problème dual consiste à minimiser une somme de fonctions de
|
||||||
|
log-partition, ce qui montre que l’algorithme VEM réalise implicitement
|
||||||
|
une descente sur le dual.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
- Idée des groupes sur la base de distance phylogénétique:
|
||||||
|
- En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
|
||||||
|
- En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
|
||||||
|
- Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
|
||||||
|
- Lire Papier UniFrac
|
||||||
|
|
||||||
|
#### Réflexion
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
- 🆕 Regarder NetComi
|
||||||
|
- 🆕 Regarder OneNet car aggrégation plus robuste
|
||||||
|
- 🆕 Réfléchir sens d'aggréger les données ou de les diviser
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕 SBM à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
BIN
suivi/2025-51/figs/tendance_temps.png
Normal file
|
After Width: | Height: | Size: 212 KiB |
176
suivi/2025-51/references.bib
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{peixotoHierarchicalBlockStructures2014,
|
||||||
|
title = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
|
||||||
|
author = {Peixoto, Tiago P.},
|
||||||
|
year = 2014,
|
||||||
|
month = mar,
|
||||||
|
journal = {Physical Review X},
|
||||||
|
volume = {4},
|
||||||
|
number = {1},
|
||||||
|
pages = {011047},
|
||||||
|
issn = {2160-3308},
|
||||||
|
doi = {10.1103/PhysRevX.4.011047},
|
||||||
|
urldate = {2025-09-26},
|
||||||
|
copyright = {http://creativecommons.org/licenses/by/3.0/},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{abramovStructureKnowsBest,
|
||||||
|
title = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
|
||||||
|
author = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
165
suivi/2026-12/2026-12.qmd
Normal file
|
|
@ -0,0 +1,165 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 9 2026 : 16 mars - 20 mars"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2026 03 16
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
# from: markdown+latex_macros
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
### Mes priorités de la semaine
|
||||||
|
|
||||||
|
- Faire tourner clustering colBiSBM sur les clusters dégagés par Mona et l'accompagner sur la rédaction de son poster
|
||||||
|
- Préparer ma présentation (voir le bloc ci-après) pour Rochebrune et donner un titre :
|
||||||
|
- "Comparing networks, a challenging task?" (NUL/20)
|
||||||
|
- "High and low: comparing networks, ~~a burden for the mind~~ what works and what don't(?)"
|
||||||
|
- Finir implémentation dans sbm de blockmodels avec covariables sur les noeuds
|
||||||
|
- Bricoler une pipeline sbm |> nnet::multinom comme performance de référence pour l'introduction de covariables
|
||||||
|
- Comprendre pourquoi l'idée géniale de Sophie est remise en question
|
||||||
|
- Corriger les copies des 1As
|
||||||
|
|
||||||
|
:::{.callout-note title="Idées présentation Rochebrune"}
|
||||||
|
|
||||||
|
- colBiSBM: ce qui marche (et à la fin la galère du clustering)
|
||||||
|
- Transition sur les OTUs et motivations de pourquoi c'est galère (#OTU>>#Sample, dépendance par la phylogénie ...)
|
||||||
|
- Motivation du co-clustering (LBM), trouver des groupes d'échantillons et d'OTUs qui exhibent des comportement différents (pathologies, sols particuliers, échantillon alimentaire avec une flore d'intérêt ...)
|
||||||
|
- Première idée: LBM séquentiel, faire repartir des $\tau^{l}$ pour initialiser les $\tau^{l+1}$ selon l'arbre phylogénétique
|
||||||
|
- Idées: faciliter l'exploration du paysage de l'ELBO en se plaçant dans une région de paramètres qui a du sens phylo et donc en sortir indiquerait un signal dans les données.
|
||||||
|
- Problèmes: ne résout pas le problème de la dimensionnalité en les OTUs
|
||||||
|
- Comme implém., performances pas incroyable et signal peu clair.
|
||||||
|
- Deuxième idée: SBM et LBM avec covariables sur les noeuds. Reconstruire des positions dans un espace phylogénétique à partir des matrices de distances phylogénétique (et donc en accord avec l'arbre). Mais aussi modèle plus large pour prendre en compte diverses situations (trouver des exemples d'autres données?).
|
||||||
|
- Idées: possible de former les groupes a priori selon les tendances dans les covariables (reflet de la phylogénie) et mettre à jour selon les données
|
||||||
|
- Quasiment implémenté dans `{blockmodels}` et dans `{sbm}` (j'aimerai pouvoir dire le jour de ma présentation que c'est dispo sur la version de développement, il va falloir charbonner de mon côté).
|
||||||
|
- Théoriquement: on a l'identifiabilité (ou pas vu la pratique?)
|
||||||
|
- Problèmes: Ne résoud pas les problèmes de calculs, en pratique on ne retrouve pas les bons coefficients (label-switching?)
|
||||||
|
- Troisième idée: Utiliser la structure de l'arbre phylogénétique pour encoder une relation des positions latentes dans un *Latent Position Model* (LPM).
|
||||||
|
- Détail: pour chaque couche $l$, chaque individu de la couche $i$ et son ancêtre $j=Ancestor(i)$ (CITER LES PAPIERS A L'ORIGINE DE L'IDEE), on écrit $\gamma_{l,i} = \gamma_{l-1,j=Ancestor(i)} + \delta_{l,i} = \gamma_{0} + \sum_{k\in Ancestry(i)} \delta_{l,k}$ (puisque qu'on a un unique ancêtre dans chaque couche $l$), les noeuds qui partagent un ancêtre commun
|
||||||
|
partage la position latente à ce niveau et lui ajoute un décalage $\delta$.
|
||||||
|
- Remarque: Peut-être possible de gérer les cas de transferts horizontaux en élargissant le concept de lignée ancestrale?
|
||||||
|
- Idée: (casquette de biologiste) possible d'avoir dans une même lignée phylogénétique des individus qui se spécialisent voire qui développent une convergence évolutive et acquièrent des traits phénotypiques qui ressemblent à d'autres familles.
|
||||||
|
Dans ce cas, le $\delta_{l,i}$ rapprochent le $\gamma_{l,i}$ d'un $\gamma_{l, i^{\prime}}$ qui a le trait commun.
|
||||||
|
- Problème: les calculs seraient ils simplifiés? pas sûr. Et je n'y ai pas encore touché.
|
||||||
|
- Utiliser les Hierarchical SBM et LBM de Peixoto dans son package `graphtools` pour initialiser l'arbre liant les couches avec l'arbre phylogénétique.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Les autres tâches
|
||||||
|
|
||||||
|
- Petites opérations sur les OTUs (regarder la matrice dans les yeux):
|
||||||
|
- Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
|
||||||
|
- HMC sur-dispersés (au-dessus bissectrice)
|
||||||
|
- Enterotype phyloseq sous-disp
|
||||||
|
- Regarder la proportion de 1. taxon rares, 2. zeros.
|
||||||
|
- Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
|
||||||
|
- *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
|
||||||
|
|
||||||
|
- Relire @peixotoHierarchicalBlockStructures2014
|
||||||
|
- Regarder les gens qui citent les travaux de Peixoto
|
||||||
|
- Utiliser graphtools en initialisant la recherche Nested avec le partitionnement donné par l'arbre phylogénétique.
|
||||||
|
|
||||||
|
:::{.callout-note title="Idées"}
|
||||||
|
|
||||||
|
- Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
|
||||||
|
- ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
|
||||||
|
- ⌛ Simulations avec $n_2$ croissant lancée sur Migale
|
||||||
|
- Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
|
||||||
|
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
- Codes pour le papier :
|
||||||
|
- Nettoyer les scripts
|
||||||
|
- Faire un joli README
|
||||||
|
- ❓Faire des notebooks
|
||||||
|
|
||||||
|
- Réussir à reproduire résultat de @abramovStructureKnowsBest
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- 👶 (délégué à Mona) Clustering sur Doré :
|
||||||
|
- Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- ⌛ (En cours) Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
|
||||||
|
Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
|
||||||
|
et en renvoyant l'ELBO adaptée.
|
||||||
|
- 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
|
||||||
|
|
||||||
|
- 😢 Besoin de réfléchir a une bonne implémentation.
|
||||||
|
|
||||||
|
J'ai codé l'optimisation et les transferts mais il faut que je vérifie que tout fonctionne
|
||||||
|
|
||||||
|
- ✅ Appliqué multipartite sur $\forall i, OTU_i \times Sample$:
|
||||||
|

|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
- Idée des groupes sur la base de distance phylogénétique:
|
||||||
|
- En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
|
||||||
|
- En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
|
||||||
|
- Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
|
||||||
|
- Lire Papier UniFrac
|
||||||
|
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
176
suivi/2026-12/references.bib
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{peixotoHierarchicalBlockStructures2014,
|
||||||
|
title = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
|
||||||
|
author = {Peixoto, Tiago P.},
|
||||||
|
year = 2014,
|
||||||
|
month = mar,
|
||||||
|
journal = {Physical Review X},
|
||||||
|
volume = {4},
|
||||||
|
number = {1},
|
||||||
|
pages = {011047},
|
||||||
|
issn = {2160-3308},
|
||||||
|
doi = {10.1103/PhysRevX.4.011047},
|
||||||
|
urldate = {2025-09-26},
|
||||||
|
copyright = {http://creativecommons.org/licenses/by/3.0/},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{abramovStructureKnowsBest,
|
||||||
|
title = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
|
||||||
|
author = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
238
suivi/2026-6/2026-6.qmd
Normal file
|
|
@ -0,0 +1,238 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 6 2026 : 02 février - 06 février"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2026 02 06
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
# from: markdown+latex_macros
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Petites opérations sur les OTUs (regarder la matrice dans les yeux):
|
||||||
|
- Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
|
||||||
|
- HMC sur-dispersés (au-dessus bissectrice)
|
||||||
|
- Enterotype phyloseq sous-disp
|
||||||
|
- Regarder la proportion de 1. taxon rares, 2. zeros.
|
||||||
|
- Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
|
||||||
|
- *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
|
||||||
|
|
||||||
|
- ✅ Faire tourner un LBM sur Human Gut et voir si ça plante sinon, **ça plante, la ram est surchargée.**
|
||||||
|
- TODO Faire LBM sur niveau taxonomique grossier, initialiser avec le résultat pour un niveau plus fin et ainsi de suite.
|
||||||
|
|
||||||
|
- ✅ Avec blockmodels, codé un LBM-Séquentiel. *Des différences contrastées...*
|
||||||
|
|
||||||
|
- ⌛ Prendre jeu de données exemple de phyloseq :
|
||||||
|
- ✅ 😞 enterotype tourne mais pas bon résultats (semble deux blocs échantillons mais pas vu par le modèle).
|
||||||
|
- ✅ des jeux de données de Mahendra ne tourne pas (phase forward interminable).
|
||||||
|
|
||||||
|
- Relire @peixotoHierarchicalBlockStructures2014
|
||||||
|
- Regarder les gens qui citent les travaux de Peixoto
|
||||||
|
|
||||||
|
- Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
|
||||||
|
|
||||||
|
:::{.callout-note title="Idées"}
|
||||||
|
|
||||||
|
- Travailler sur Fungus Tree network
|
||||||
|
- 🔍**Demander à PB et SD** : Comparaison covar prop avec GREMLINS multipartite sur (log(dist_phylo), fungus-tree)
|
||||||
|
- Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
|
||||||
|
- ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
|
||||||
|
- ⌛ Simulations avec $n_2$ croissant lancée sur Migale
|
||||||
|
- Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
|
||||||
|
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
- Codes pour le papier :
|
||||||
|
- Nettoyer les scripts
|
||||||
|
- Faire un joli README
|
||||||
|
- ❓Faire des notebooks
|
||||||
|
|
||||||
|
- Réussir à reproduire résultat de @abramovStructureKnowsBest
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- 👶 (délégué à Mona) Clustering sur Doré :
|
||||||
|
- Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- ✅ En préparation d'un fichier (réu avec JBL à 10h45 le 06/02/2026).
|
||||||
|
Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
|
||||||
|
Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
|
||||||
|
et en renvoyant l'ELBO adaptée.
|
||||||
|
- 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
|
||||||
|
|
||||||
|
- 😢 Besoin de réfléchir a une bonne implémentation.
|
||||||
|
|
||||||
|
#### Modèle avec covariables sur probas d'appartenances aux groupes
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\pmb{\beta}_{r}& = \begin{pmatrix}
|
||||||
|
\beta_{r,0}\\
|
||||||
|
\vdots\\
|
||||||
|
\beta_{r,p}
|
||||||
|
\end{pmatrix}, & X_{:,j} = \begin{pmatrix}
|
||||||
|
1\\
|
||||||
|
x_{1}\\
|
||||||
|
\vdots\\
|
||||||
|
x_p
|
||||||
|
\end{pmatrix}\\
|
||||||
|
\pmb{\beta}_r^{\top} X_{:,j}& = \beta_{r,0} + \beta_{r,1} x_{1} + \dots + \beta_{r,p} x_p & \approx \log(\rho_r^j) \\
|
||||||
|
\pmb{B} & = \begin{pmatrix}
|
||||||
|
\pmb{\beta}_1 \dots \pmb{\beta}_r \dots \pmb{\beta}_R
|
||||||
|
\end{pmatrix} & \pmb{B}^{\top} X_{:,j} \approx \log(\pmb{\rho}^j) \\
|
||||||
|
\pmb{B}^{\top} \pmb{X} & \approx \log((\pmb{\rho}^j)_{j=1,\dots,n_2}) = \log(\pmb{\Rho})\\
|
||||||
|
\end{align*}
|
||||||
|
Et pour les probas en lignes du LBM
|
||||||
|
\begin{align*}
|
||||||
|
\pmb{\gamma}_{q}& = \begin{pmatrix}
|
||||||
|
\gamma_{q,0}\\
|
||||||
|
\vdots\\
|
||||||
|
\gamma_{q,d}
|
||||||
|
\end{pmatrix}, & V_{:,i} = \begin{pmatrix}
|
||||||
|
1\\
|
||||||
|
v_{1}\\
|
||||||
|
\vdots\\
|
||||||
|
v_d
|
||||||
|
\end{pmatrix}\\
|
||||||
|
\pmb{\gamma}_q^{\top} V_{:,i}& = \gamma_{q,0} + \gamma_{q,1} x_{1} + \dots + \gamma_{q,p} x_p & \approx \log(\pi_q^i) \\
|
||||||
|
\pmb{\Gamma} & = \begin{pmatrix}
|
||||||
|
\pmb{\gamma}_1 \dots \pmb{\gamma}_q \dots \pmb{\gamma}_Q
|
||||||
|
\end{pmatrix} & \pmb{\Gamma}^{\top} V_{:,i} \approx \log(\pmb{\pi}^i) \\
|
||||||
|
\pmb{\Gamma}^{\top} \pmb{X} & \approx \log((\pmb{\pi}^i)_{i=1,\dots,n_1}) = \log(\pmb{\Pi})
|
||||||
|
|
||||||
|
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Note sur l'identifiabilité (par JBL)
|
||||||
|
|
||||||
|
Soient $X : (p+1, n_2), B : (p+1, R)$ avec $X$ de plein rang, i.e., $rg(X) = p+1\implies XX^{\top}$ est inversible.
|
||||||
|
|
||||||
|
On veut qu'il existe $B^{\prime}$ et $B$ avec $B_{:,R} = \vec 0_p$, par les propriétés de la fonction softmax, $\sigma(.)$ :
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
& \sigma(B^{\top}X) = \sigma({B^{\prime}}^{\top}X)\\
|
||||||
|
& \iff \exists C \in \mathbb{R}^{n_2}, B^{\top} X = {B^{\prime}}^{\top} X + \pmb{1}_R C^{\top}\\
|
||||||
|
& \iff \exists C \in \mathbb{R}^{n_2}, B^{\top} X - \pmb{1}_R C^{\top} = {B^{\prime}}^{\top} X\\
|
||||||
|
& \iff \exists C \in \mathbb{R}^{n_2}, (B^{\top} X - \pmb{1}_R C^{\top}) X^{\top} = {B^{\prime}}^{\top} X X^{\top}\\
|
||||||
|
& \iff \exists C \in \mathbb{R}^{n_2}, (B^{\top} X - \pmb{1}_R C^{\top}) X^{\top}(X X^{\top})^{-1} = {B^{\prime}}^{\top}\\
|
||||||
|
|
||||||
|
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Description du modèle hiérarchique
|
||||||
|
|
||||||
|
Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
|
||||||
|
W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
|
||||||
|
Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
|
||||||
|
|
||||||
|
$$
|
||||||
|
\ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
|
||||||
|
+ \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r} \\
|
||||||
|
- \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
|
||||||
|
$$
|
||||||
|
|
||||||
|
##### Modèle Sophie
|
||||||
|
|
||||||
|
Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Mais il y a besoin de poser une contrainte sur l'un des $(\beta_r)_{r=1,\dots,R}$, ici $\beta_R = 0$.
|
||||||
|
|
||||||
|
La partie pertinente de l'ELBO devient:
|
||||||
|
$$
|
||||||
|
P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
|
||||||
|
$${#eq-modele-covar-prop}
|
||||||
|
|
||||||
|
Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
|
||||||
|
\begin{align*}
|
||||||
|
\dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
|
||||||
|
& = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr]
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
- Idée des groupes sur la base de distance phylogénétique:
|
||||||
|
- En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
|
||||||
|
- En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
|
||||||
|
- Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
|
||||||
|
- Lire Papier UniFrac
|
||||||
|
|
||||||
|
#### Réflexion
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
- 🆕 Regarder NetComi
|
||||||
|
- 🆕 Regarder OneNet car aggrégation plus robuste
|
||||||
|
- 🆕 Réfléchir sens d'aggréger les données ou de les diviser
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕 SBM à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
176
suivi/2026-6/references.bib
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{peixotoHierarchicalBlockStructures2014,
|
||||||
|
title = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
|
||||||
|
author = {Peixoto, Tiago P.},
|
||||||
|
year = 2014,
|
||||||
|
month = mar,
|
||||||
|
journal = {Physical Review X},
|
||||||
|
volume = {4},
|
||||||
|
number = {1},
|
||||||
|
pages = {011047},
|
||||||
|
issn = {2160-3308},
|
||||||
|
doi = {10.1103/PhysRevX.4.011047},
|
||||||
|
urldate = {2025-09-26},
|
||||||
|
copyright = {http://creativecommons.org/licenses/by/3.0/},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{abramovStructureKnowsBest,
|
||||||
|
title = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
|
||||||
|
author = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
226
suivi/2026-7/2026-7.qmd
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 7 2026 : 09 février - 13 février"
|
||||||
|
categories: [colBiSBM, inférence, GNN, covariables, identifiabilité]
|
||||||
|
date: 2026 02 13
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
# from: markdown+latex_macros
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Petites opérations sur les OTUs (regarder la matrice dans les yeux):
|
||||||
|
- Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
|
||||||
|
- HMC sur-dispersés (au-dessus bissectrice)
|
||||||
|
- Enterotype phyloseq sous-disp
|
||||||
|
- Regarder la proportion de 1. taxon rares, 2. zeros.
|
||||||
|
- Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
|
||||||
|
- *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
|
||||||
|
|
||||||
|
|
||||||
|
- ✅ Avec blockmodels, codé un LBM-Séquentiel. *Des différences contrastées...*
|
||||||
|
- TODO Ajouter lien vers notebooks résultats
|
||||||
|
|
||||||
|
- Relire @peixotoHierarchicalBlockStructures2014
|
||||||
|
- Regarder les gens qui citent les travaux de Peixoto
|
||||||
|
|
||||||
|
- ⌛ **En cours** Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
|
||||||
|
|
||||||
|
:::{.callout-note title="Idées"}
|
||||||
|
|
||||||
|
- Travailler sur Fungus Tree network
|
||||||
|
- ⌛**Demander à PB et SD**, ils regardent : Comparaison covar prop avec GREMLINS multipartite sur (log(dist_phylo), fungus-tree)
|
||||||
|
- Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
|
||||||
|
- ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
|
||||||
|
- ⌛ Simulations avec $n_2$ croissant lancée sur Migale
|
||||||
|
- Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
|
||||||
|
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
- Codes pour le papier :
|
||||||
|
- Nettoyer les scripts
|
||||||
|
- Faire un joli README
|
||||||
|
- ❓Faire des notebooks
|
||||||
|
|
||||||
|
- Réussir à reproduire résultat de @abramovStructureKnowsBest
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- 👶 (délégué à Mona) Clustering sur Doré :
|
||||||
|
- Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- ✅ En préparation d'un fichier (réu avec JBL à 10h45 le 06/02/2026).
|
||||||
|
Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
|
||||||
|
Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
|
||||||
|
et en renvoyant l'ELBO adaptée.
|
||||||
|
- 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
|
||||||
|
|
||||||
|
- 😢 Besoin de réfléchir a une bonne implémentation.
|
||||||
|
|
||||||
|
#### Modèle avec covariables sur probas d'appartenances aux groupes
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\pmb{\beta}_{r}& = \begin{pmatrix}
|
||||||
|
\beta_{r,0}\\
|
||||||
|
\vdots\\
|
||||||
|
\beta_{r,p}
|
||||||
|
\end{pmatrix}, & X_{j,\bullet} = \begin{pmatrix}
|
||||||
|
1 = x_{0,j} & x_{1,j} & \dots & x_{p,j}
|
||||||
|
\end{pmatrix}\\
|
||||||
|
X_{j,\bullet} \pmb{\beta}_r& = \beta_{r,0} x_{0,j} + \beta_{r,1} x_{1,j} + \dots + \beta_{r,p} x_{p,j} & \approx \log(\rho_r^j) \\
|
||||||
|
B & = \begin{pmatrix}
|
||||||
|
\pmb{\beta}_1 \dots \pmb{\beta}_r \dots \pmb{\beta}_R
|
||||||
|
\end{pmatrix} & X_{j,\bullet}B \approx \log(\pmb{\rho}^j) \\
|
||||||
|
X B & \approx \log((\pmb{\rho}^j)_{j=1,\dots,n_2}) = \log(\pmb{\Rho})\\
|
||||||
|
\end{align*}
|
||||||
|
Et pour les probas en lignes du LBM
|
||||||
|
\begin{align*}
|
||||||
|
\pmb{\gamma}_{q}& = \begin{pmatrix}
|
||||||
|
\gamma_{q,0}\\
|
||||||
|
\vdots\\
|
||||||
|
\gamma_{q,d}
|
||||||
|
\end{pmatrix}, & V_{i,\bullet} = \begin{pmatrix}
|
||||||
|
1 = v_{0,i} & v_{1,i} & \dots & v_{d,i}
|
||||||
|
\end{pmatrix}\\
|
||||||
|
V_{i,\bullet} \pmb{\gamma}_q & = \gamma_{q,0} v_{0,i} + \gamma_{q,1} v_{1,i} + \dots + \gamma_{q,d} v_{d,i} & \approx \log(\pi_q^i) \\
|
||||||
|
\Gamma & = \begin{pmatrix}
|
||||||
|
\gamma_1 \dots \pmb{\gamma}_q \dots \pmb{\gamma}_Q
|
||||||
|
\end{pmatrix} & V_{i,\bullet} \Gamma \approx \log(\pmb{\pi}^i) \\
|
||||||
|
V \Gamma & \approx \log((\pmb{\pi}^i)_{i=1,\dots,n_1}) = \log(\pmb{\Pi})
|
||||||
|
|
||||||
|
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Preuve sur l'identifiabilité
|
||||||
|
Soient $B,B^{\prime}$ avec $B_{\bullet,R} = B^{\prime}_{\bullet,R} = \vec{0}_{p+1}$ et $X$ de rang plein tel que $X^{\top}X$ soit inversible.
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
&\sigma(XB) = \sigma(XB^{\prime})\\
|
||||||
|
&\implies \exists C = \begin{pmatrix}c_1 \\ \vdots \\ c_j \\ \vdots \\ c_{n_2}\end{pmatrix} \in \mathbb{R}^{n_2}, X B = X B^{\prime} + C \pmb{1}_{R}^{\top} \\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, (X B)_{j,r} = (X B^{\prime})_{j,r} + (C \pmb{1}_{R}^{\top})_{j,r} \\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, \forall r\in\{1\dots,R\}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \beta_{k,r} = \sum_{k=1}^{p+1} x_{j,k} \beta^{\prime}_{k,r} + c_j\\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \beta_{k,R} = \sum_{k=1}^{p+1} x_{j,k} \beta^{\prime}_{k,R} + c_j \\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, \sum_{k=1}^{p+1} x_{j,k} \times 0 = \sum_{k=1}^{p+1} x_{j,k} \times 0 + c_j \\
|
||||||
|
&\implies \exists C \in \mathbb{R}^{n_2}, \forall j\in\{1,\dots,n_2\}, 0 = 0 + c_j \implies c_j = 0 \\
|
||||||
|
&\implies C = \begin{pmatrix} 0 \\ \vdots \\ 0 \end{pmatrix} \text{and thus}, XB = XB^{\prime} \\
|
||||||
|
& \implies (X^{\top} X)^{-1}X^{\top} X B = (X^{\top} X)^{-1}X^{\top} X B^{\prime} \implies B=B^{\prime}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Description du modèle hiérarchique
|
||||||
|
|
||||||
|
Toujours modèle LBM mais avec probas d'appartenance pour les colonnes variables:
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
Z_i &\sim \mathcal{M}(1; \pi_1, \dots, \pi_Q), \sum_{q=1}^{Q} \pi_q = 1\\
|
||||||
|
W_j &\sim \mathcal{M}(1; \rho_1^j, \dots, \rho_R^j), \sum_{r=1}^{R} \rho_r^j = 1\\
|
||||||
|
Y_{i,j}&\mid Z_i = q, W_j = r \sim \mathcal{F}(\alpha_{qr})
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Inférence variationnelle donc $\ell(Y;\pmb{\theta}) \geq \mathcal{J}(\mathcal{R},\pmb{\theta})$ avec
|
||||||
|
|
||||||
|
$$
|
||||||
|
\ELBORTheta = \sum_{i = 1}^{n_1}\sum_{j=1}^{n_2}\sum_{q \in \mathcal{Q}_1} \sum_{r \in \mathcal{Q}_2} \tau_{iq}^{1} \tau_{jr}^{2} \log f(Y_{ij}; \alpha_{qr})
|
||||||
|
+ \sum_{i=1}^{n_1} \sum_{q \in \mathcal{Q}_1} \tau_{iq}^{1} \log \pi_{\color{black}q} + \sum_{j=1}^{n_2} \sum_{r \in \mathcal{Q}_2} \tau_{jr}^{2} \log \rho_{\color{black}r} \\
|
||||||
|
- \sum_{i=1}^{n_1} \tau_{iq}^{1} \log \tau_{iq}^{1} - \sum_{j=1}^{n_2} \tau_{jr}^{2} \log \tau_{jr}^{2}
|
||||||
|
$$
|
||||||
|
|
||||||
|
##### Modèle Sophie
|
||||||
|
|
||||||
|
Avec $\rho_r^j = \frac{\exp{\beta_r X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} = \sigma(\pmb{\beta} \pmb{X})_{r,j}$, où $\sigma$ désigne le softmax. Mais il y a besoin de poser une contrainte sur l'un des $(\beta_r)_{r=1,\dots,R}$, ici $\beta_R = 0$.
|
||||||
|
|
||||||
|
La partie pertinente de l'ELBO devient:
|
||||||
|
$$
|
||||||
|
P((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \sum_{r=1}^{R} [\tau_{jr} (\beta_r X_j - \log (\sum_{s=1}^{R} \exp{\beta_s X_j}))]
|
||||||
|
$${#eq-modele-covar-prop}
|
||||||
|
|
||||||
|
Et on obtient la dérivée partielle par rapport à $\beta_t$ comme:
|
||||||
|
\begin{align*}
|
||||||
|
\dfrac{\partial P}{\partial \beta_t}&((\beta_r)_{r=1,\dots,R}, (X_j)_{j=1,\dots,n_2}, (\tau_{jr})_{\substack{j=1,\dots,n_2\\r=1,\dots,R}} ) = \sum_{j=1}^{n_2} \biggl[ \tau_{jt} X_j - \frac{X_j \exp{\beta_t X_j}}{\sum_{s=1}^{R} \exp{\beta_s X_j}} \biggr]\\
|
||||||
|
& = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \sigma(\pmb{\beta} \pmb{X})_{t,j}\bigr) X_j\biggr] = \sum_{j=1}^{n_2} \biggl[\bigl(\tau_{jt} - \rho_t^j \bigr) X_j\biggr]
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
- Idée des groupes sur la base de distance phylogénétique:
|
||||||
|
- En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
|
||||||
|
- En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
|
||||||
|
- Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
|
||||||
|
- Lire Papier UniFrac
|
||||||
|
|
||||||
|
#### Réflexion
|
||||||
|
|
||||||
|
- easy16s : se renseigner sur
|
||||||
|
- $\alpha$, $\beta$ diversité
|
||||||
|
- Heatmap
|
||||||
|
- Regarder **SPARTA** Rennes
|
||||||
|
- Ecrire et étudier les modèles pour différents niveaux taxonomiques.
|
||||||
|
- 🆕 Regarder NetComi
|
||||||
|
- 🆕 Regarder OneNet car aggrégation plus robuste
|
||||||
|
- 🆕 Réfléchir sens d'aggréger les données ou de les diviser
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- Lancer *colBiSBM* sur $OTU\times Sample$ → problème du chargement en mémoire des données à voir
|
||||||
|
- Lancer *colSBM* sur $OTU\times OTU$
|
||||||
|
- TabNet pratiquer les [exercices](https://github.com/cregouby/Tutoriel_torch)
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕 SBM à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
176
suivi/2026-7/references.bib
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{peixotoHierarchicalBlockStructures2014,
|
||||||
|
title = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
|
||||||
|
author = {Peixoto, Tiago P.},
|
||||||
|
year = 2014,
|
||||||
|
month = mar,
|
||||||
|
journal = {Physical Review X},
|
||||||
|
volume = {4},
|
||||||
|
number = {1},
|
||||||
|
pages = {011047},
|
||||||
|
issn = {2160-3308},
|
||||||
|
doi = {10.1103/PhysRevX.4.011047},
|
||||||
|
urldate = {2025-09-26},
|
||||||
|
copyright = {http://creativecommons.org/licenses/by/3.0/},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{abramovStructureKnowsBest,
|
||||||
|
title = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
|
||||||
|
author = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
130
suivi/2026-8/2026-8.qmd
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 8 2026 : 16 février - 20 février"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2026 02 13
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
# from: markdown+latex_macros
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Petites opérations sur les OTUs (regarder la matrice dans les yeux):
|
||||||
|
- Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
|
||||||
|
- HMC sur-dispersés (au-dessus bissectrice)
|
||||||
|
- Enterotype phyloseq sous-disp
|
||||||
|
- Regarder la proportion de 1. taxon rares, 2. zeros.
|
||||||
|
- Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
|
||||||
|
- *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
|
||||||
|
|
||||||
|
|
||||||
|
- ✅ Avec blockmodels, codé un LBM-Séquentiel. *Des différences contrastées...*
|
||||||
|
- TODO Ajouter lien vers notebooks résultats
|
||||||
|
|
||||||
|
- Relire @peixotoHierarchicalBlockStructures2014
|
||||||
|
- Regarder les gens qui citent les travaux de Peixoto
|
||||||
|
|
||||||
|
- ⌛ **En cours** Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
|
||||||
|
|
||||||
|
:::{.callout-note title="Idées"}
|
||||||
|
|
||||||
|
- Travailler sur Fungus Tree network
|
||||||
|
- ✅ **Demander à PB et SD**, ils regardent : Comparaison covar prop avec GREMLINS multipartite sur (log(dist_phylo), fungus-tree)
|
||||||
|
- Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
|
||||||
|
- ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
|
||||||
|
- ⌛ Simulations avec $n_2$ croissant lancée sur Migale
|
||||||
|
- Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
|
||||||
|
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
- Codes pour le papier :
|
||||||
|
- Nettoyer les scripts
|
||||||
|
- Faire un joli README
|
||||||
|
- ❓Faire des notebooks
|
||||||
|
|
||||||
|
- Réussir à reproduire résultat de @abramovStructureKnowsBest
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- 👶 (délégué à Mona) Clustering sur Doré :
|
||||||
|
- Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- ✅ En préparation d'un fichier (réu avec JBL à 10h45 le 06/02/2026).
|
||||||
|
Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
|
||||||
|
Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
|
||||||
|
et en renvoyant l'ELBO adaptée.
|
||||||
|
- 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
|
||||||
|
|
||||||
|
- 😢 Besoin de réfléchir a une bonne implémentation.
|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
- Idée des groupes sur la base de distance phylogénétique:
|
||||||
|
- En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
|
||||||
|
- En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
|
||||||
|
- Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
|
||||||
|
- Lire Papier UniFrac
|
||||||
|
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
176
suivi/2026-8/references.bib
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{peixotoHierarchicalBlockStructures2014,
|
||||||
|
title = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
|
||||||
|
author = {Peixoto, Tiago P.},
|
||||||
|
year = 2014,
|
||||||
|
month = mar,
|
||||||
|
journal = {Physical Review X},
|
||||||
|
volume = {4},
|
||||||
|
number = {1},
|
||||||
|
pages = {011047},
|
||||||
|
issn = {2160-3308},
|
||||||
|
doi = {10.1103/PhysRevX.4.011047},
|
||||||
|
urldate = {2025-09-26},
|
||||||
|
copyright = {http://creativecommons.org/licenses/by/3.0/},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{abramovStructureKnowsBest,
|
||||||
|
title = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
|
||||||
|
author = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
133
suivi/2026-9/2026-9.qmd
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
---
|
||||||
|
title: "Bilan semaine 9 2026 : 23 février - 27 février"
|
||||||
|
categories: [colBiSBM, inférence, GNN]
|
||||||
|
date: 2026 02 23
|
||||||
|
date-modified: last-modified
|
||||||
|
bibliography: references.bib
|
||||||
|
# from: markdown+latex_macros
|
||||||
|
---
|
||||||
|
{{< include /_macros.tex >}}
|
||||||
|
|
||||||
|
## TODO List
|
||||||
|
|
||||||
|
- Petites opérations sur les OTUs (regarder la matrice dans les yeux):
|
||||||
|
- Ranger les OTUs par variances (i.e. `sd(OTU_j)`)
|
||||||
|
- HMC sur-dispersés (au-dessus bissectrice)
|
||||||
|
- Enterotype phyloseq sous-disp
|
||||||
|
- Regarder la proportion de 1. taxon rares, 2. zeros.
|
||||||
|
- Faire des coupures selon niveaux taxonomiques et regarder si $\Var_{\text{intra}} \approx \Var_{\text{inter}}$
|
||||||
|
- *Bonus*: faire ça dans qmd et voir si forge permet gitlab pages
|
||||||
|
|
||||||
|
|
||||||
|
- ✅ Avec blockmodels, codé un LBM-Séquentiel. *Des différences contrastées...*
|
||||||
|
- [Lien vers l'application du LBM séquentiel sur les données de Chaillou](analysis_benchmark_lbm_seq.html)
|
||||||
|
|
||||||
|
- Relire @peixotoHierarchicalBlockStructures2014
|
||||||
|
- Regarder les gens qui citent les travaux de Peixoto
|
||||||
|
- Utiliser graphtools en initialisant la recherche Nested avec le partitionnement donné par l'arbre phylogénétique.
|
||||||
|
|
||||||
|
- ⌛ **En cours** Implémentation `blockmodels` LBM avec covariables sur proportions (voir @eq-modele-covar-prop)
|
||||||
|
|
||||||
|
:::{.callout-note title="Idées"}
|
||||||
|
|
||||||
|
- Trouver manière de faire un compromis : $\ell(Y,Z,W;\theta) - \lambda d(C(W),C_0)$ avec $C(W)$ le clustering seulement sur la base de la structure LBM et $C_0$ le clustering de l'arbre. Problème $d$ est une distance entre partition, comment optimiser dessus ?
|
||||||
|
- ⌛ Mise à jour partielle des $\tau$ : ce qui pose soucis c'est les gros calculs matriciels (c'est vraiment vrai?). Donc sorte de "stochastic" VEM où on update seulement une partie des $\tau$ à chaque itération. Et échantillonnage stratifié selon l'arbre ?
|
||||||
|
- ⌛ Simulations avec $n_2$ croissant lancée sur Migale
|
||||||
|
- Réimplementé VE Bernoulli dans colSBM pour Bipartite et début implémentation Stochastic VE. En fait le problème des calculs matriciels $Y\times(\tau^{(1)})^{\top}$ ($n_2^2$) donc besoin de sous-échantillonner les noeuds de l'autre dimension à mettre à jour.
|
||||||
|
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
- Clustering unipartite j'ai cassé une fonction de distance à vérifier et réparer
|
||||||
|
|
||||||
|
- Codes pour le papier :
|
||||||
|
- Nettoyer les scripts
|
||||||
|
- Faire un joli README
|
||||||
|
- ❓Faire des notebooks
|
||||||
|
|
||||||
|
- Réussir à reproduire résultat de @abramovStructureKnowsBest
|
||||||
|
|
||||||
|
- Maitriser graphtools de Peixoto pour essayer d'utiliser l'arbre taxonomique sur graphe de cooccurence inférer par SparCC
|
||||||
|
|
||||||
|
- Maitriser SparCC
|
||||||
|
|
||||||
|
- 👶 (délégué à Mona) Clustering sur Doré :
|
||||||
|
- Ajouter Chao1 et 2, colonne par colonne (site par site), et faire indice moyen et la variance.
|
||||||
|
|
||||||
|
### Inférence et microbes
|
||||||
|
|
||||||
|
- ⌛ (En cours) Possible en modifiant lbm.h et sbm.h d'obtenir un modèle utilisant les covariables de groupes (de blocs ?).
|
||||||
|
Car besoin de changer `membership.m_step()` pour mettre à jour $\pmb\pi$ et $\pmb{\rho}$ en utilisant les $\pmb B^{\top}\pmb X$
|
||||||
|
et en renvoyant l'ELBO adaptée.
|
||||||
|
- 😄 Avantage s'inscrit directement dans blockmodels et permet d'avoir toutes les lois d'émissions déjà codées et compatibles !
|
||||||
|
|
||||||
|
- 😢 Besoin de réfléchir a une bonne implémentation.
|
||||||
|
|
||||||
|
J'ai codé l'optimisation et les transferts mais il faut que je vérifie que tout fonctionne
|
||||||
|
|
||||||
|
- ✅ Appliqué multipartite sur $\forall i, OTU_i \times Sample$:
|
||||||
|

|
||||||
|
|
||||||
|
#### Bibliographie: à lire, à faire
|
||||||
|
|
||||||
|
- Lire article multi-niveaux Saint-Clair
|
||||||
|
- 🆕 🔎 Trouver des papiers:
|
||||||
|
- LBM Negative Binomial
|
||||||
|
- Network inference through sample comparison
|
||||||
|
|
||||||
|
- Idée des groupes sur la base de distance phylogénétique:
|
||||||
|
- En train de comprendre les distances que phyloseq permet de calculer sur notre exemple
|
||||||
|
- En train de lire sur Principle coordinate analysis : https://openplantpathology.github.io/OPP_Workshop_Multivariate/2-MV_PCO.html
|
||||||
|
- Parametric t-SNE pour avoir une unique représentation latente (inconvénient utilise du Deep Learning)
|
||||||
|
- Lire Papier UniFrac
|
||||||
|
|
||||||
|
|
||||||
|
#### Écrire et faire tourner
|
||||||
|
|
||||||
|
- 🆕 SparCC à différent niveaux
|
||||||
|
- 🆕⌛ Tree-PLN à différents niveaux
|
||||||
|
|
||||||
|
|
||||||
|
#### Causalité
|
||||||
|
|
||||||
|
Plus sur le temps long, à regarder
|
||||||
|
|
||||||
|
- GT causalité
|
||||||
|
- Daria Bystrova lire présentation @bystrovaCausalDiscovery (Meek rules, V-structure)
|
||||||
|
|
||||||
|
## A discuter
|
||||||
|
|
||||||
|
- 🆕 Voir pour des Réseaux / GDR ou aller
|
||||||
|
- 🆕 Chercher des cours à suivre
|
||||||
|
|
||||||
|
## Biblio à faire
|
||||||
|
|
||||||
|
- Regarder Transport optimal graphes bipartite.
|
||||||
|
|
||||||
|
|
||||||
|
## Lectures en cours 📚
|
||||||
|
|
||||||
|
### HDR Vincent Brault
|
||||||
|
|
||||||
|
- ⌛ Chap 2 : Creuser l'idée de maximiser l'énergie libre, très intéressant regarder le critère CARI et lire Robert et al 2021. Actuellement p32 du manuscrit
|
||||||
|
- Chap 3
|
||||||
|
|
||||||
|
### OT
|
||||||
|
- ⌛ @mazeletUnsupervisedLearningOptimal Intéressant pour le transport optimal entre graphes de tailles différentes | Regarder si regularization entropique ne marche pas bien pour le graphe.
|
||||||
|
- ⌛ @nennaLecture2Entropic Pour comprendre le problème d'OT régularisé pour l'entropie.
|
||||||
|
- ⌛ @nennaLecture1Monge
|
||||||
|
|
||||||
|
### Inférence de graphes
|
||||||
|
|
||||||
|
- ⌛ @aitchisonStatisticalAnalysisCompositional1982a, en cours
|
||||||
|
|
||||||
|
- ❗📖 @payneFiniteMixturesMultivariate2023 sur MixMPLN
|
||||||
|
|
||||||
|
### Causalité
|
||||||
|
|
||||||
|
- ❗📖 @bystrovaCausalDiscovery
|
||||||
|
|
||||||
|
### Largest Gaps
|
||||||
|
|
||||||
|
- ❗📖 @braultFastConsistentAlgorithm2023
|
||||||
|
- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps*
|
||||||
4083
suivi/2026-9/analysis_benchmark_lbm_seq.html
Normal file
50783
suivi/2026-9/figs/Multipartite.svg
Normal file
|
After Width: | Height: | Size: 7.5 MiB |
176
suivi/2026-9/references.bib
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
@article{mazeletUnsupervisedLearningOptimal,
|
||||||
|
title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
|
||||||
|
author = {Mazelet, Sonia and Flamary, Rémi and Thirion, Bertrand},
|
||||||
|
abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{peixotoHierarchicalBlockStructures2014,
|
||||||
|
title = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
|
||||||
|
author = {Peixoto, Tiago P.},
|
||||||
|
year = 2014,
|
||||||
|
month = mar,
|
||||||
|
journal = {Physical Review X},
|
||||||
|
volume = {4},
|
||||||
|
number = {1},
|
||||||
|
pages = {011047},
|
||||||
|
issn = {2160-3308},
|
||||||
|
doi = {10.1103/PhysRevX.4.011047},
|
||||||
|
urldate = {2025-09-26},
|
||||||
|
copyright = {http://creativecommons.org/licenses/by/3.0/},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{abramovStructureKnowsBest,
|
||||||
|
title = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
|
||||||
|
author = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@article{nennaLecture2Entropic,
|
||||||
|
title = {Lecture 2: {{Entropic Optimal Transport}}},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{nennaLecture1Monge,
|
||||||
|
title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
|
||||||
|
author = {Nenna, Luca},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Morton2021.11.09.467939,
|
||||||
|
title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
|
||||||
|
author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and Lähdesmäki, Harri and Bonneau, Rich},
|
||||||
|
date = {2021},
|
||||||
|
journaltitle = {bioRxiv : the preprint server for biology},
|
||||||
|
shortjournal = {bioRxiv},
|
||||||
|
eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
|
||||||
|
publisher = {Cold Spring Harbor Laboratory},
|
||||||
|
doi = {10.1101/2021.11.09.467939},
|
||||||
|
url = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939},
|
||||||
|
abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
|
||||||
|
elocation-id = {2021.11.09.467939},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
|
||||||
|
}
|
||||||
|
@article{aitchisonStatisticalAnalysisCompositional1982a,
|
||||||
|
title = {The {{Statistical Analysis}} of {{Compositional Data}}},
|
||||||
|
author = {Aitchison, J.},
|
||||||
|
date = {1982},
|
||||||
|
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||||
|
volume = {44},
|
||||||
|
number = {2},
|
||||||
|
eprint = {2345821},
|
||||||
|
eprinttype = {jstor},
|
||||||
|
pages = {139--177},
|
||||||
|
publisher = {[Royal Statistical Society, Oxford University Press]},
|
||||||
|
issn = {0035-9246},
|
||||||
|
url = {https://www.jstor.org/stable/2345821},
|
||||||
|
urldate = {2025-05-07},
|
||||||
|
abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-05-07T07:43:38.485Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
|
||||||
|
}
|
||||||
|
@online{payneFiniteMixturesMultivariate2023,
|
||||||
|
title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
|
||||||
|
author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
|
||||||
|
date = {2023-11-13},
|
||||||
|
eprint = {2311.07762},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {stat},
|
||||||
|
doi = {10.48550/arXiv.2311.07762},
|
||||||
|
url = {http://arxiv.org/abs/2311.07762},
|
||||||
|
urldate = {2025-07-02},
|
||||||
|
abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
|
||||||
|
}
|
||||||
|
@unpublished{bystrovaCausalDiscovery,
|
||||||
|
title = {Causal Discovery},
|
||||||
|
author = {Bystrova, Daria},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{braultFastConsistentAlgorithm2023,
|
||||||
|
title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine},
|
||||||
|
date = {2023-03-09},
|
||||||
|
eprint = {1610.09005},
|
||||||
|
eprinttype = {arXiv},
|
||||||
|
eprintclass = {math},
|
||||||
|
doi = {10.48550/arXiv.1610.09005},
|
||||||
|
url = {http://arxiv.org/abs/1610.09005},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
|
||||||
|
langid = {english},
|
||||||
|
pubstate = {prepublished},
|
||||||
|
keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{braultGeneralisationLalgorithmeLargest,
|
||||||
|
title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique},
|
||||||
|
author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie},
|
||||||
|
abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
|
||||||
|
langid = {french},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{channarondClassificationEstimationStochastic2012,
|
||||||
|
title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
|
||||||
|
author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane},
|
||||||
|
date = {2012-01-01},
|
||||||
|
journaltitle = {Electronic Journal of Statistics},
|
||||||
|
shortjournal = {Electron. J. Statist.},
|
||||||
|
volume = {6},
|
||||||
|
publisher = {Institute of Mathematical Statistics},
|
||||||
|
issn = {1935-7524},
|
||||||
|
doi = {10.1214/12-ejs753},
|
||||||
|
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full},
|
||||||
|
urldate = {2025-07-09},
|
||||||
|
abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
|
||||||
|
issue = {none},
|
||||||
|
langid = {english},
|
||||||
|
keywords = {/unread},
|
||||||
|
annotation = {Read\_Status: New\\
|
||||||
|
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
|
||||||
|
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
|
||||||
|
}
|
||||||
|
|
@ -3,11 +3,16 @@
|
||||||
# re-render posts only when a change to the source file is made ----
|
# re-render posts only when a change to the source file is made ----
|
||||||
freeze: auto
|
freeze: auto
|
||||||
|
|
||||||
author:
|
|
||||||
name: Louis Lacoste
|
|
||||||
email: louis.lacoste@agroparistech.fr
|
|
||||||
affiliation: MIA Paris-Saclay, INRAE, AgroParisTech, Université Paris-Saclay
|
|
||||||
orcid: 0009-0004-0178-9821
|
|
||||||
|
|
||||||
# enable banner style title blocks ----
|
# enable banner style title blocks ----
|
||||||
title-block-banner: true
|
title-block-banner: true
|
||||||
|
|
||||||
|
crossref:
|
||||||
|
custom:
|
||||||
|
- kind: float
|
||||||
|
reference-prefix: Item
|
||||||
|
key: item
|
||||||
|
- kind: float
|
||||||
|
reference-prefix: Reference
|
||||||
|
key: ref
|
||||||
|
|
||||||
|
lightbox: true
|
||||||
18
template.qmd
|
|
@ -1,18 +0,0 @@
|
||||||
---
|
|
||||||
title: "Bilan semaine MM YYYY : dd-dd mois"
|
|
||||||
format:
|
|
||||||
html:
|
|
||||||
embed-resources: true
|
|
||||||
---
|
|
||||||
|
|
||||||
## A faire
|
|
||||||
|
|
||||||
-
|
|
||||||
|
|
||||||
## J'ai fait
|
|
||||||
|
|
||||||
-
|
|
||||||
|
|
||||||
## A continuer
|
|
||||||
|
|
||||||
-
|
|
||||||