diff --git a/suivi/2025-28/2025-28.qmd b/suivi/2025-28/2025-28.qmd index 4b66ea2..88fb6e8 100644 --- a/suivi/2025-28/2025-28.qmd +++ b/suivi/2025-28/2025-28.qmd @@ -111,6 +111,12 @@ OT, comparaison clustering, adaption ARI, *Largest Gap* - ❗📖 @bystrovaCausalDiscovery +### Largest Gaps + +- ✅ @braultGeneralisationLalgorithmeLargest petit résumé de l'algo de @braultFastConsistentAlgorithm2023 +- ❗📖 @braultFastConsistentAlgorithm2023 +- ❗📖 @channarondClassificationEstimationStochastic2012 le papier qui introduit le *Largest Gaps* + ## A discuter ### Congés P&S diff --git a/suivi/2025-28/references.bib b/suivi/2025-28/references.bib index 35916f5..1de96d9 100644 --- a/suivi/2025-28/references.bib +++ b/suivi/2025-28/references.bib @@ -91,3 +91,54 @@ Read\_Status\_Date: 2025-07-02T09:34:39.476Z}, file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf} } + +@online{braultFastConsistentAlgorithm2023, + title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}}, + author = {Brault, Vincent and Channarond, Antoine}, + date = {2023-03-09}, + eprint = {1610.09005}, + eprinttype = {arXiv}, + eprintclass = {math}, + doi = {10.48550/arXiv.1610.09005}, + url = {http://arxiv.org/abs/1610.09005}, + urldate = {2025-07-09}, + abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.}, + langid = {english}, + pubstate = {prepublished}, + keywords = {/unread,Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory}, + annotation = {Read\_Status: New\\ + Read\_Status\_Date: 2025-07-09T13:58:53.533Z}, + file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf} +} + +@article{braultGeneralisationLalgorithmeLargest, + title = {Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique}, + author = {Brault, Vincent and Channarond, Antoine and Robert, Valérie}, + abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.}, + langid = {french}, + keywords = {/unread}, + annotation = {Read\_Status: New\\ + Read\_Status\_Date: 2025-07-09T12:29:43.098Z}, + file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf} +} + +@article{channarondClassificationEstimationStochastic2012, + title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees}, + author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, Stéphane}, + date = {2012-01-01}, + journaltitle = {Electronic Journal of Statistics}, + shortjournal = {Electron. J. Statist.}, + volume = {6}, + publisher = {Institute of Mathematical Statistics}, + issn = {1935-7524}, + doi = {10.1214/12-ejs753}, + url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Classification-and-estimation-in-the-Stochastic-Blockmodel-based-on-the/10.1214/12-EJS753.full}, + urldate = {2025-07-09}, + abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.}, + issue = {none}, + langid = {english}, + keywords = {/unread}, + annotation = {Read\_Status: New\\ + Read\_Status\_Date: 2025-07-09T13:59:33.921Z}, + file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf} +}