Louis/Thèse/these_ref.bib

@article{abbeCommunityDetectionStochastic2018,
  title = {Community {{Detection}} and {{Stochastic Block Models}}: {{Recent Developments}}},
  shorttitle = {Community {{Detection}} and {{Stochastic Block Models}}},
  author = {Abbe, Emmanuel},
  year = 2018,
  journal = {Journal of Machine Learning Research},
  volume = {18},
  number = {177},
  pages = {1--86},
  issn = {1533-7928},
  urldate = {2026-06-01},
  abstract = {The stochastic block model (SBM) is a random graph model with planted clusters. It is widely employed as a canonical model to study clustering and community detection, and provides generally a fertile ground to study the statistical and computational tradeoffs that arise in network and data sciences. This note surveys the recent developments that establish the fundamental limits for community detection in the SBM, both with respect to information-theoretic and computational thresholds, and for various recovery requirements such as exact, partial and weak recovery (a.k.a., detection). The main results discussed are the phase transitions for exact recovery at the Chernoff-Hellinger threshold, the phase transition for weak recovery at the Kesten- Stigum threshold, the optimal distortion-SNR tradeoff for partial recovery, the learning of the SBM parameters and the gap between information-theoretic and computational thresholds. The note also covers some of the algorithms developed in the quest of achieving the limits, in particular two-round algorithms via graph-splitting, semi-definite programming, linearized belief propagation, classical and nonbacktracking spectral methods. A few open problems are also discussed.},
  keywords = {/unread},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-01T14:16:02.103Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7TP3R4KI/Abbe - 2018 - Community Detection and Stochastic Block Models Recent Developments.pdf}
}

@article{abdillIntegration168000Samples2025a,
  title = {Integration of 168,000 Samples Reveals Global Patterns of the Human Gut Microbiome},
  author = {Abdill, Richard J. and Graham, Samantha P. and Rubinetti, Vincent and Ahmadian, Mansooreh and Hicks, Parker and Chetty, Ashwin and McDonald, Daniel and Ferretti, Pamela and Gibbons, Elizabeth and Rossi, Marco and Krishnan, Arjun and Albert, Frank W. and Greene, Casey S. and Davis, Sean and Blekhman, Ran},
  year = 2025,
  month = feb,
  journal = {Cell},
  volume = {188},
  number = {4},
  pages = {1100-1118.e17},
  issn = {0092-8674},
  doi = {10.1016/j.cell.2024.12.017},
  urldate = {2025-05-05},
  abstract = {The factors shaping human microbiome variation are a major focus of biomedical research. While other fields have used large sequencing compendia to extract insights requiring otherwise impractical sample sizes, the microbiome field has lacked a comparably sized resource for the 16S rRNA gene amplicon sequencing commonly used to quantify microbiome composition. To address this gap, we processed 168,464 publicly available human gut microbiome samples with a uniform pipeline. We use this compendium to evaluate geographic and technical effects on microbiome variation. We find that regions such as Central and Southern Asia differ significantly from the more thoroughly characterized microbiomes of Europe and Northern America and that composition alone can be used to predict a sample's region of origin. We also find strong associations between microbiome variation and technical factors such as primers and DNA extraction. We anticipate this growing work, the Human Microbiome Compendium, will enable advanced applied and methodological research.},
  keywords = {16S rRNA amplicon sequencing,atlas,compendium,global variation,gut microbiome},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-05T07:37:00.292Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/QGRJP84V/Abdill et al. - 2025 - Integration of 168,000 samples reveals global patterns of the human gut microbiome.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/PVCAC23S/S0092867424014302.html}
}

@article{abramovStructureKnowsBest,
  title = {Structure Knows Best: Predicting Ecological Interactions across Space through Pairwise Integration of Latent Network Patterns},
  author = {Abramov, Kesem and Biton, Barry and Galai, Geut and Puzis, Rami and Pilosof, Shai},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-12-01T08:50:29.812Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DX44VXQA/Abramov et al. - Structure knows best predicting ecological interactions across space through pairwise integration o.pdf}
}

@misc{AccueilMIAParisSaclay,
  title = {Accueil \textbar{} {{MIA Paris-Saclay}}},
  urldate = {2023-07-03},
  howpublished = {https://mia-ps.inrae.fr/},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/I7FWTZC3/mia-ps.inrae.fr.html}
}

@incollection{AgglomerativeNestingProgram1990,
  title = {Agglomerative {{Nesting}} ({{Program AGNES}})},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {199--252},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.ch5},
  urldate = {2024-09-13},
  abstract = {The prelims comprise: Short Description of the Method How to Use the Program AGNES Examples More on the Algorithm and the Program Related Methods and References},
  chapter = {5},
  isbn = {978-0-470-31680-1},
  langid = {english},
  keywords = {agglomerative nesting,data set,dissimilarity matrix,graphical representations,interval-scaled variables},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/CVXNL7SP/1990 - Agglomerative Nesting (Program AGNES).pdf}
}

@article{agterbergJointSpectralClustering2025,
  title = {Joint {{Spectral Clustering}} in {{Multilayer Degree-Corrected Stochastic Blockmodels}}},
  author = {Agterberg, Joshua and Lubberts, Zachary and Arroyo, Jes{\'u}s},
  year = 2025,
  month = apr,
  journal = {Journal of the American Statistical Association},
  volume = {0},
  number = {0},
  pages = {1--15},
  publisher = {ASA Website},
  issn = {0162-1459},
  doi = {10.1080/01621459.2025.2516201},
  urldate = {2025-09-19},
  abstract = {Modern network datasets are often composed of multiple layers, resulting in collections of networks over the same set of vertices but with potentially different connectivity patterns on each network. These data require models and methods that are flexible enough to capture local and global differences across the networks while at the same time being parsimonious and tractable to yield computationally efficient and theoretically sound solutions that are capable of aggregating information across the networks. This paper considers the multilayer degree-corrected stochastic blockmodel, where a collection of networks shares the same community structure, but degree corrections and block connection probability matrices are permitted to be different. We establish the identifiability of this model and propose a spectral clustering algorithm. Our theoretical results demonstrate that the misclustering error rate of the algorithm improves exponentially with multiple network realizations, even in the presence of significant layer heterogeneity. Simulation studies show that this approach improves on existing multilayer community detection methods in this challenging regime. Furthermore, in a case study of US airport data through January 2016 -- September 2021, we find that this methodology identifies meaningful community structure and trends in airport popularity influenced by pandemic impacts on travel. Supplementary materials for this article are available online, including a standardized description of the materials available for reproducing the work.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-19T13:53:26.541Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/C82RAE8U/Agterberg et al. - Joint Spectral Clustering in Multilayer Degree-Corrected Stochastic Blockmodels.pdf}
}

@article{aitchisonConciseGuideCompositionala,
  title = {A {{Concise Guide}} to {{Compositional Data Analysis}}},
  author = {Aitchison, John},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-07T13:07:55.461Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/SP7NE2I8/Aitchison - A Concise Guide to Compositional Data Analysis.pdf}
}

@article{aitchisonStatisticalAnalysisCompositional1982,
  title = {The {{Statistical Analysis}} of {{Compositional Data}}},
  author = {Aitchison, J.},
  year = 1982,
  journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume = {44},
  number = {2},
  eprint = {2345821},
  eprinttype = {jstor},
  pages = {139--177},
  publisher = {[Royal Statistical Society, Oxford University Press]},
  issn = {0035-9246},
  urldate = {2025-05-07},
  abstract = {The simplex plays an important role as sample space in many practical situations where compositional data, in the form of proportions of some whole, require interpretation. It is argued that the statistical analysis of such data has proved difficult because of a lack both of concepts of independence and of rich enough parametric classes of distributions in the simplex. A variety of independence hypotheses are introduced and interrelated, and new classes of transformed-normal distributions in the simplex are provided as models within which the independence hypotheses can be tested through standard theory of parametric hypothesis testing. The new concepts and statistical methodology are illustrated by a number of applications.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-07T07:43:11.308Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S97URH4Y/Aitchison - 1982 - The Statistical Analysis of Compositional Data.pdf}
}

@misc{anakokBipartiteGraphVariational2024,
  title = {Bipartite {{Graph Variational Auto-Encoder}} with {{Fair Latent Representation}} to {{Account}} for {{Sampling Bias}} in {{Ecological Networks}}},
  author = {Anakok, Emre and Barbillon, Pierre and Fontaine, Colin and Thebault, Elisa},
  year = 2024,
  month = jul,
  number = {arXiv:2403.02011},
  eprint = {2403.02011},
  primaryclass = {stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2403.02011},
  urldate = {2025-09-21},
  abstract = {We propose a method to represent bipartite networks using graph embeddings tailored to tackle the challenges of studying ecological networks, such as the ones linking plants and pollinators, where many covariates need to be accounted for, in particular to control for sampling bias. We adapt the variational graph auto-encoder approach to the bipartite case, which enables us to generate embeddings in a latent space where the two sets of nodes are positioned based on their probability of connection. We translate the fairness framework commonly considered in sociology in order to address sampling bias in ecology. By incorporating the Hilbert-Schmidt independence criterion (HSIC) as an additional penalty term in the loss we optimize, we ensure that the structure of the latent space is independent of continuous variables, which are related to the sampling process. Finally, we show how our approach can change our understanding of ecological networks when applied to the Spipoll data set, a citizen science monitoring program of plant-pollinator interactions to which many observers contribute, making it prone to sampling bias.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Machine Learning,Computer Science - Social and Information Networks,Statistics - Machine Learning},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-23T11:03:32.788Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/3VZTYZP4/Anakok et al. - 2024 - Bipartite Graph Variational Auto-Encoder with Fair Latent Representation to Account for Sampling Bia.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/RFL44JLY/2403.html}
}

@misc{anakokDisentanglingStructureEcological2022,
  title = {Disentangling the Structure of Ecological Bipartite Networks from Observation Processes},
  author = {Anakok, Emre and Barbillon, Pierre and Fontaine, Colin and Thebault, Elisa},
  year = 2022,
  month = nov,
  number = {arXiv:2211.16364},
  eprint = {2211.16364},
  primaryclass = {stat},
  publisher = {arXiv},
  urldate = {2023-06-14},
  abstract = {The structure of a bipartite interaction network can be described by providing a clustering for each of the two types of nodes. Such clusterings are outputted by fitting a Latent Block Model (LBM) on an observed network that comes from a sampling of species interactions in the field. However, the sampling is limited and possibly uneven. This may jeopardize the fit of the LBM and then the description of the structure of the network by detecting structures which result from the sampling and not from actual underlying ecological phenomena. If the observed interaction network consists of a weighted bipartite network where the number of observed interactions between two species is available, the sampling efforts for all species can be estimated and used to correct the LBM fit. We propose to combine an observation model that accounts for sampling and an LBM for describing the structure of underlying possible ecological interactions. We develop an original inference procedure for this model, the efficiency of which is demonstrated in simulation studies. The practical interest in ecology of our model is highlighted on a large dataset of plant-pollinator network.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Statistics - Methodology},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/LQ3FINZG/Anakok et al. - 2022 - Disentangling the structure of ecological bipartit.pdf}
}

@article{andersonNewMethodNonparametric2001,
  title = {A New Method for Non-parametric Multivariate Analysis of Variance},
  author = {Anderson, Marti J.},
  year = 2001,
  month = feb,
  journal = {Austral Ecology},
  volume = {26},
  number = {1},
  pages = {32--46},
  issn = {1442-9985, 1442-9993},
  doi = {10.1111/j.1442-9993.2001.01070.pp.x},
  urldate = {2025-11-10},
  abstract = {Hypothesis-testing methods for multivariate data are needed to make rigorous probability statements about the effects of factors and their interactions in experiments. Analysis of variance is particularly powerful for the analysis of univariate data. The traditional multivariate analogues, however, are too stringent in their assumptions for most ecological multivariate data sets. Non-parametric methods, based on permutation tests, are preferable. This paper describes a new non-parametric method for multivariate analysis of variance, after McArdle and Anderson (in press). It is given here, with several applications in ecology, to provide an alternative and perhaps more intuitive formulation for ANOVA (based on sums of squared distances) to complement the description provided by McArdle and Anderson (in press) for the analysis of any linear model. It is an improvement on previous non-parametric methods because it allows a direct additive partitioning of variation for complex models. It does this while maintaining the flexibility and lack of formal assumptions of other non-parametric methods. The teststatistic is a multivariate analogue to Fisher's F-ratio and is calculated directly from any symmetric distance or dissimilarity matrix. P-values are then obtained using permutations. Some examples of the method are given for tests involving several factors, including factorial and hierarchical (nested) designs and tests of interactions.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-11-10T09:24:58.855Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/QGZIKBQH/Anderson - 2001 - A new method for non‐parametric multivariate analysis of variance.pdf}
}

@incollection{Appendix1990,
  title = {Appendix},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {312--319},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.app1},
  urldate = {2024-09-13},
  isbn = {978-0-470-31680-1},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/RPETTM3Z/1990 - Appendix.pdf}
}

@article{arroyoInferenceMultipleHeterogeneous2021,
  title = {Inference for {{Multiple Heterogeneous Networks}} with a {{Common Invariant Subspace}}},
  author = {Arroyo, Jes{\'u}s and Athreya, Avanti and Cape, Joshua and Chen, Guodong and Priebe, Carey E. and Vogelstein, Joshua T.},
  year = 2021,
  journal = {Journal of Machine Learning Research},
  volume = {22},
  number = {142},
  pages = {1--49},
  issn = {1533-7928},
  urldate = {2025-09-19},
  abstract = {The development of models and methodology for the analysis of data from multiple heterogeneous networks is of importance both in statistical network theory and across a wide spectrum of application domains. Although single-graph analysis is well-studied, multiple graph inference is largely unexplored, in part because of the challenges inherent in appropriately modeling graph differences and yet retaining sufficient model simplicity to render estimation feasible. This paper addresses exactly this gap, by introducing a new model, the common subspace independent-edge multiple random graph model, which describes a heterogeneous collection of networks with a shared latent structure on the vertices but potentially different connectivity patterns for each graph. The model encompasses many popular network representations, including the stochastic blockmodel. The model is both flexible enough to meaningfully account for important graph differences, and tractable enough to allow for accurate inference in multiple networks. In particular, a joint spectral embedding of adjacency matrices---the multiple adjacency spectral embedding---leads to simultaneous consistent estimation of underlying parameters for each graph. Under mild additional assumptions, the estimates satisfy asymptotic normality and yield improvements for graph eigenvalue estimation. In both simulated and real data, the model and the embedding can be deployed for a number of subsequent network inference tasks, including dimensionality reduction, classification, hypothesis testing, and community detection. Specifically, when the embedding is applied to a data set of connectomes constructed through diffusion magnetic resonance imaging, the result is an accurate classification of brain scans by human subject and a meaningful determination of heterogeneity across scans of different individuals.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-19T14:02:30.452Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DAGYGPA3/Arroyo et al. - 2021 - Inference for Multiple Heterogeneous Networks with a Common Invariant Subspace.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/23CUVTKH/19-558.html;/home/louis/snap/zotero-snap/common/Zotero/storage/RPJ3SJGW/19-558.html}
}

@article{aubertModelbasedBiclusteringOverdispersed2021,
  title = {Model-Based Biclustering for Overdispersed Count Data with Application in Microbial Ecology},
  author = {Aubert, Julie and Schbath, Sophie and Robin, St{\'e}phane},
  year = 2021,
  journal = {Methods in Ecology and Evolution},
  volume = {12},
  number = {6},
  pages = {1050--1061},
  issn = {2041-210X},
  doi = {10.1111/2041-210X.13582},
  urldate = {2023-06-22},
  abstract = {Different studies have shown that microbial communities living in animals (humans included), in or around plants have a significant impact on health and disease of their host and on various services, such as adaptation under stressing environment. The basic input data to study microbiomes is a matrix representing abundance data of micro-organisms across different sampling units. Such a matrix typically corresponds to taxonomic profiles derived from the high-throughput sequencing of environmental samples. Biclustering is one way to study the interactions between the structure of micro-organism communities and the environmental samples they come from. We propose a latent block model (LBM) and an associated inference procedure for the biclustering of rows and columns of abundance matrices. The LBM assumes that micro-organisms (rows) and environmental samples (columns) can both be clustered into groups characterizing preferential interaction or avoidance. We use the Poisson--Gamma distribution to model the overdispersion observed in microbial abundance data and introduce row and column effects to account for the sequencing effort in each sample and the mean abundance of each micro-organism. Because the latent variables are not independent conditionally on the observed ones, classical maximum likelihood inference is intractable. We then derive a variational-based inference algorithm and propose a strategy to select the number of biclusters. We illustrate the flexibility and performance of our approach both on a simulation study and on three ecological datasets. The model-based framework allows us to adapt to peculiarities of microbial ecological abundance data and allows us to explore relationships between entities of two different natures. We implemented our method in the cobiclust R package available on the CRAN and built a website with example of usage (https://julieaubert.github.io/cobiclust/cobiclust-example1.html).},
  copyright = {\copyright{} 2021 British Ecological Society},
  langid = {english},
  keywords = {Community structure,Connectomics,count data,Distance measurement,Eigenvalues,latent block model,Mathematical models,metabarcoding,microbial interactions,model-based biclustering,Neural networks,Poisson-Gamma distribution,Random graphs,Schools,variational EM algorithm},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/A4V9MJAF/Aubert et al. - 2021 - Model-based biclustering for overdispersed count d.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/CAI8Y2NJ/Wills et Meyer - 2020 - Metrics for graph comparison A practitioner’s guide.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/GWSQVDNI/Aubert et al. - 2021 - Model‐based biclustering for overdispersed count data with application in microbial ecology.pdf}
}

@article{aubertUsingLatentBlock2022,
  title = {Using {{Latent Block Models}} to {{Detect Structure}} in {{Ecological Networks}}},
  author = {Aubert, Julie and Barbillon, Pierre and Donnet, Sophie and Miele, Vincent},
  year = 2022,
  journal = {Statistical Approaches for Hidden Variables in Ecology},
  doi = {10.1002/9781119902799.ch6},
  urldate = {2026-05-21},
  keywords = {/unread},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-21T07:25:50.730Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/3YDE3C5L/Aubert et al. - 2022 - Using Latent Block Models to Detect Structure in Ecological Networks.pdf}
}

@incollection{AuthorIndex1990,
  title = {Author {{Index}}},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {322--335},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.indauth},
  urldate = {2024-09-13},
  isbn = {978-0-470-31680-1},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S9F7YWH4/1990 - Author Index.pdf}
}

@article{AutoencodeurVariationnel2024,
  title = {{Auto-encodeur variationnel}},
  year = 2024,
  month = mar,
  journal = {Wikip\'edia},
  urldate = {2024-05-21},
  abstract = {En apprentissage automatique, un auto-encodeur variationnel (ou VAE de l'anglais variational auto encoder), est une architecture de r\'eseau de neurones artificiels introduite en 2013 par D. Kingma et M. Welling, appartenant aux familles des mod\`eles graphiques probabilistes et des m\'ethodes bay\'esiennes variationnelles. Les VAE sont souvent rapproch\'es des autoencodeurs, en raison de leur architectures similaires. Leur utilisation et leur formulation math\'ematiques sont cependant diff\'erentes. Les auto-encodeurs variationnels permettent de formuler un probl\`eme d'inf\'erence statistique (par exemple, d\'eduire la valeur d'une variable al\'eatoire \`a partir d'une autre variable al\'eatoire) en un probl\`eme d'optimisation statistique (c'est-\`a-dire trouver les valeurs de param\`etres qui minimisent une fonction objectif). Ils repr\'esentent une fonction associant \`a une valeur d'entr\'ee une distribution latente multivari\'ee, qui n'est pas directement observ\'ee mais d\'eduite depuis un mod\`ele math\'ematique \`a partir de la distribution d'autres variables. Bien que ce type de mod\`ele ait \'et\'e initialement con\c cu pour l'apprentissage non supervis\'e, son efficacit\'e a \'et\'e prouv\'ee pour l'apprentissage semi-supervis\'e, et l'apprentissage supervis\'e.},
  copyright = {Creative Commons Attribution-ShareAlike License},
  langid = {french},
  annotation = {Page Version ID: 213326719},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HLPG5K7Y/Auto-encodeur_variationnel.html}
}

@article{baldockDailyTemporalStructure2011,
  title = {Daily Temporal Structure in {{African}} Savanna Flower Visitation Networks and Consequences for Network Sampling},
  author = {Baldock, Katherine C. R. and Memmott, Jane and {Ruiz-Guajardo}, Juan Carlos and Roze, Denis and Stone, Graham N.},
  year = 2011,
  journal = {Ecology},
  volume = {92},
  number = {3},
  pages = {687--698},
  issn = {1939-9170},
  doi = {10.1890/10-1110.1},
  urldate = {2024-07-02},
  abstract = {Ecological interaction networks are a valuable approach to understanding plant--pollinator interactions at the community level. Highly structured daily activity patterns are a feature of the biology of many flower visitors, particularly provisioning female bees, which often visit different floral sources at different times. Such temporal structure implies that presence/absence and relative abundance of specific flower--visitor interactions (links) in interaction networks may be highly sensitive to the daily timing of data collection. Further, relative timing of interactions is central to their possible role in competition or facilitation of seed set among coflowering plants sharing pollinators. To date, however, no study has examined the network impacts of daily temporal variation in visitor activity at a community scale. Here we use temporally structured sampling to examine the consequences of daily activity patterns upon network properties using fully quantified flower--visitor interaction data for a Kenyan savanna habitat. Interactions were sampled at four sequential three-hour time intervals between 06:00 and 18:00, across multiple seasonal time points for two sampling sites. In all data sets the richness and relative abundance of links depended critically on when during the day visitation was observed. Permutation-based null modeling revealed significant temporal structure across daily time intervals at three of the four seasonal time points, driven primarily by patterns in bee activity. This sensitivity of network structure shows the need to consider daily time in network sampling design, both to maximize the probability of sampling links relevant to plant reproductive success and to facilitate appropriate interpretation of interspecific relationships. Our data also suggest that daily structuring at a community level could reduce indirect competitive interactions when coflowering plants share pollinators, as is commonly observed during flowering in highly seasonal habitats.},
  copyright = {\copyright{} 2011 by the Ecological Society of America},
  langid = {english},
  keywords = {Africa,competition,ecological networks,facilitation,Kenya,mutualism,pollination,savanna,temporal structure,visitation webs},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/4ALS9Y6W/10-1110.1.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/4YSLVYC5/Baldock et al. - 2011 - Daily temporal structure in African savanna flower.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/44J286I7/10-1110.html;/home/louis/snap/zotero-snap/common/Zotero/storage/7PEDTWU9/10-1110.html}
}

@article{baldockSystemsApproachReveals2019,
  title = {A Systems Approach Reveals Urban Pollinator Hotspots and Conservation Opportunities},
  author = {Baldock, Katherine C. R. and Goddard, Mark A. and Hicks, Damien M. and Kunin, William E. and Mitschunas, Nadine and Morse, Helen and Osgathorpe, Lynne M. and Potts, Simon G. and Robertson, Kirsty M. and Scott, Anna V. and Staniczenko, Phillip P. A. and Stone, Graham N. and Vaughan, Ian P. and Memmott, Jane},
  year = 2019,
  month = mar,
  journal = {Nat Ecol Evol},
  volume = {3},
  number = {3},
  pages = {363--373},
  publisher = {Nature Publishing Group},
  issn = {2397-334X},
  doi = {10.1038/s41559-018-0769-y},
  urldate = {2024-06-25},
  abstract = {Urban areas are often perceived to have lower biodiversity than the wider countryside, but a few small-scale studies suggest that some urban land uses can support substantial pollinator populations. We present a large-scale, well-replicated study of floral resources and pollinators in 360 sites incorporating all major land uses in four British cities. Using a systems approach, we developed Bayesian network models integrating pollinator dispersal and resource switching to estimate city-scale effects of management interventions on plant--pollinator community robustness to species loss. We show that residential gardens and allotments (community gardens) are pollinator `hotspots': gardens due to their extensive area, and allotments due to their high pollinator diversity and leverage on city-scale plant--pollinator community robustness. Household income was positively associated with pollinator abundance in gardens, highlighting the influence of socioeconomic factors. Our results underpin urban planning recommendations to enhance pollinator conservation, using increasing city-scale community robustness as our measure of success.},
  copyright = {2019 The Author(s), under exclusive licence to Springer Nature Limited},
  langid = {english},
  keywords = {Bayes Theorem,Biodiversity,Cities,Conservation of Natural Resources,Ecological networks,Ecosystem services,England,Pollination,Robustness,Scotland,Systems Analysis,Urban ecology},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/EUG7U3L4/s41559-018-0769-y.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/T5BAU3P8/Baldock et al. - 2019 - A systems approach reveals urban pollinator hotspo.pdf}
}

@article{barberoSabinaHSBMPackageLink,
  title = {{{sabinaHSBM}}: {{An R}} Package for Link Prediction Network Reconstruction Using {{Hierarchical Stochastic Block Models}}},
  author = {Barbero, Jennifer Morales},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-11-03T12:31:15.098Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/6BWFTN4Q/Barbero - sabinaHSBM An R package for link prediction network reconstruction using Hierarchical Stochastic Bl.pdf}
}

@article{barbillonSciencesDonneesApprentissage,
  title = {{Sciences des donn\'ees : apprentissage statistique}},
  author = {Barbillon, Pierre},
  langid = {french},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/YI2QU6FD/Barbillon - Sciences des données  apprentissage statistique.pdf}
}

@article{bascompteNestedAssemblyPlant2003,
  title = {The Nested Assembly of Plant--Animal Mutualistic Networks},
  author = {Bascompte, Jordi and Jordano, Pedro and Meli{\'a}n, Carlos J. and Olesen, Jens M.},
  year = 2003,
  month = aug,
  journal = {Proceedings of the National Academy of Sciences},
  volume = {100},
  number = {16},
  pages = {9383--9387},
  publisher = {Proceedings of the National Academy of Sciences},
  doi = {10.1073/pnas.1633576100},
  urldate = {2026-06-10},
  abstract = {Most studies of plant--animal mutualisms involve a small number of species. There is almost no information on the structural organization of species-rich mutualistic networks despite its potential importance for the maintenance of diversity. Here we analyze 52 mutualistic networks and show that they are highly nested; that is, the more specialist species interact only with proper subsets of those species interacting with the more generalists. This assembly pattern generates highly asymmetrical interactions and organizes the community cohesively around a central core of interactions. Thus, mutualistic networks are neither randomly assembled nor organized in compartments arising from tight, parallel specialization. Furthermore, nestedness increases with the complexity (number of interactions) of the network: for a given number of species, communities with more interactions are significantly more nested. Our results indicate a nonrandom pattern of community organization that may be relevant for our understanding of the organization and persistence of biodiversity.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-10T15:10:14.000Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/MHN9EMQR/Bascompte et al. - 2003 - The nested assembly of plant–animal mutualistic networks.pdf}
}

@article{bashanUniversalityHumanMicrobial2016a,
  title = {Universality of {{Human Microbial Dynamics}}},
  author = {Bashan, Amir and Gibson, Travis E. and Friedman, Jonathan and Carey, Vincent J. and Weiss, Scott T. and Hohmann, Elizabeth L. and Liu, Yang-Yu},
  year = 2016,
  month = jun,
  journal = {Nature},
  volume = {534},
  number = {7606},
  pages = {259--262},
  issn = {0028-0836},
  doi = {10.1038/nature18301},
  urldate = {2025-05-05},
  abstract = {The recent realization that human-associated microbial communities play a crucial role in determining our health and well-being, has led to the ongoing development of microbiome-based therapies such as fecal microbiota transplantation,. Thosemicrobial communities are very complex, dynamic and highly personalized ecosystems,, exhibiting a high degree of inter-individual variability in both species assemblages and abundance profiles. It is not known whether the underlying ecological dynamics, which can be parameterized by growth rates, intra- and inter-species interactions in population dynamics models, are largely host-independent (i.e. ``universal'') or host-specific. If the inter-individual variability reflects host-specific dynamics due to differences in host lifestyle, physiology, or genetics, then generic microbiome manipulations may have unintended consequences, rendering them ineffectual or even detrimental. Alternatively, microbial ecosystems of different subjects may follow a universal dynamics with the inter-individual variability mainly stemming from differences in the sets of colonizing species,. Here we developed a novel computational method to characterize human microbial dynamics. Applying this method to cross-sectional data from two large-scale metagenomic studies, the Human Microbiome Project, and the Student Microbiome Project, we found that both gut and mouth microbiomes display pronounced universal dynamics, whereas communities associated with certain skin sites are likely shaped by differences in the host environment. Interestingly, the universality of gut microbial dynamics is not observed in subjects with recurrent Clostridium difficile infection but is observed in the same set of subjects after fecal microbiota transplantation. These results fundamentally improve our understanding of forces and processes shaping human microbial ecosystems, paving the way to design general microbiome-based therapies.},
  pmcid = {PMC4902290},
  pmid = {27279224},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-05T15:33:24.405Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/VPKQISHV/Bashan et al. - 2016 - Universality of Human Microbial Dynamics.pdf}
}

@misc{battagliaRelationalInductiveBiases2018,
  title = {Relational Inductive Biases, Deep Learning, and Graph Networks},
  author = {Battaglia, Peter W. and Hamrick, Jessica B. and Bapst, Victor and {Sanchez-Gonzalez}, Alvaro and Zambaldi, Vinicius and Malinowski, Mateusz and Tacchetti, Andrea and Raposo, David and Santoro, Adam and Faulkner, Ryan and Gulcehre, Caglar and Song, Francis and Ballard, Andrew and Gilmer, Justin and Dahl, George and Vaswani, Ashish and Allen, Kelsey and Nash, Charles and Langston, Victoria and Dyer, Chris and Heess, Nicolas and Wierstra, Daan and Kohli, Pushmeet and Botvinick, Matt and Vinyals, Oriol and Li, Yujia and Pascanu, Razvan},
  year = 2018,
  month = oct,
  number = {arXiv:1806.01261},
  eprint = {1806.01261},
  primaryclass = {cs, stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1806.01261},
  urldate = {2024-05-15},
  abstract = {Artificial intelligence (AI) has undergone a renaissance recently, making major progress in key domains such as vision, language, control, and decision-making. This has been due, in part, to cheap data and cheap compute resources, which have fit the natural strengths of deep learning. However, many defining characteristics of human intelligence, which developed under much different pressures, remain out of reach for current approaches. In particular, generalizing beyond one's experiences--a hallmark of human intelligence from infancy--remains a formidable challenge for modern AI. The following is part position paper, part review, and part unification. We argue that combinatorial generalization must be a top priority for AI to achieve human-like abilities, and that structured representations and computations are key to realizing this objective. Just as biology uses nature and nurture cooperatively, we reject the false choice between "hand-engineering" and "end-to-end" learning, and instead advocate for an approach which benefits from their complementary strengths. We explore how using relational inductive biases within deep learning architectures can facilitate learning about entities, relations, and rules for composing them. We present a new building block for the AI toolkit with a strong relational inductive bias--the graph network--which generalizes and extends various approaches for neural networks that operate on graphs, and provides a straightforward interface for manipulating structured knowledge and producing structured behaviors. We discuss how graph networks can support relational reasoning and combinatorial generalization, laying the foundation for more sophisticated, interpretable, and flexible patterns of reasoning. As a companion to this paper, we have released an open-source software library for building graph networks, with demonstrations of how to use them in practice.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/98Z2MFJP/Battaglia et al. - 2018 - Relational inductive biases, deep learning, and gr.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/FIUI8TKL/1806.html}
}

@misc{battistonHierarchicalStochasticBlock2024,
  title = {The {{Hierarchical Stochastic Block Model}} for {{Multiple Networks}}},
  author = {Battiston, Marco and Lee, Clement},
  year = 2024,
  month = jul,
  doi = {10.21203/rs.3.rs-4601684/v1},
  urldate = {2024-07-08},
  abstract = {In many research fields, there is an increased availability of network data arising as multiple networks. However, most statistical models for network data in the literature are designed for a single network. Among these, the Stochastic Block Model is arguably the most popular model to perform vertex clustering and community detection. We propose the Hierarchical Stochastic Block Model, a generalization of the SBM to the setting of multiple networks. This model uses a Hierarchical Pitman-Yor prior for the block allocation vector of each graph. The proposed model has two main advantages: 1) it allows different networks to share the same latent blocks and the level of sharing is learnt from the data; 2) the number of blocks in each graph and the overall number of blocks are learnt from the data too, hence avoiding complicated model selection procedures. We derive both MCMC and Variational Inference algorithms. The former targets the correct posterior and is tuning-free, while the latter relies on an approximation of the posterior distribution, but is potentially more scalable than MCMC. We apply the HSBM to a co-authorship network and a brain connectomic network, to illustrate how the model is able to capture different levels of block sharing.},
  copyright = {https://creativecommons.org/licenses/by/4.0/},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/AYFZ2ENG/Battiston et Lee - 2024 - The Hierarchical Stochastic Block Model for Multip.pdf}
}

@article{beauguitteLanalyseGraphesBipartis,
  title = {{L'analyse des graphes bipartis}},
  author = {Beauguitte, Laurent},
  langid = {french},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/JN3DD4XS/Beauguitte - L'analyse des graphes bipartis.pdf}
}

@article{beaumontApproximateBayesianComputation2002,
  title = {Approximate {{Bayesian Computation}} in {{Population Genetics}}},
  author = {Beaumont, Mark A and Zhang, Wenyang and Balding, David J},
  year = 2002,
  month = dec,
  journal = {Genetics},
  volume = {162},
  number = {4},
  pages = {2025--2035},
  issn = {1943-2631},
  doi = {10.1093/genetics/162.4.2025},
  urldate = {2026-05-15},
  abstract = {We propose a new method for approximate Bayesian statistical inference on the basis of summary statistics. The method is suited to complex problems that arise in population genetics, extending ideas developed in this setting by earlier authors. Properties of the posterior distribution of a parameter, such as its mean or density curve, are approximated without explicit likelihood calculations. This is achieved by fitting a local-linear regression of simulated parameter values on simulated summary statistics, and then substituting the observed summary statistics into the regression equation. The method combines many of the advantages of Bayesian statistical inference with the computational efficiency of methods based on summary statistics. A key advantage of the method is that the nuisance parameters are automatically integrated out in the simulation step, so that the large numbers of nuisance parameters that arise in population genetics problems can be handled without difficulty. Simulation results indicate computational and statistical efficiency that compares favorably with those of alternative methods previously proposed in the literature. We also compare the relative efficiency of inferences obtained using methods based on summary statistics with those obtained directly from the data using MCMC.},
  keywords = {/unread},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-15T15:42:56.407Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/PQUKXEI2/Beaumont et al. - 2002 - Approximate Bayesian Computation in Population Genetics.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/EULY93WM/162.4.html}
}

@article{beaumontApproximateBayesianComputation2010a,
  title = {Approximate {{Bayesian Computation}} in {{Evolution}} and {{Ecology}}},
  author = {Beaumont, Mark A.},
  year = 2010,
  month = dec,
  journal = {Annu. Rev. Ecol. Evol. Syst.},
  volume = {41},
  number = {1},
  pages = {379--406},
  issn = {1543-592X, 1545-2069},
  doi = {10.1146/annurev-ecolsys-102209-144621},
  urldate = {2026-05-13},
  abstract = {In the past 10 years a statistical technique, approximate Bayesian computation (ABC), has been developed that can be used to infer parameters and choose between models in the complicated scenarios that are often considered in the environmental sciences. For example, based on gene sequence and microsatellite data, the method has been used to choose between competing models of human demographic history as well as to infer growth rates, times of divergence, and other parameters. The method fits naturally in the Bayesian inferential framework, and a brief overview is given of the key concepts. Three main approaches to ABC have been developed, and these are described and compared. Although the method arose in population genetics, ABC is increasingly used in other fields, including epidemiology, systems biology, ecology, and agent-based modeling, and many of these applications are briefly described.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-13T14:01:45.947Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/TEJMMB4Y/Beaumont - 2010 - Approximate Bayesian Computation in Evolution and Ecology.pdf}
}

@article{becoche-mosqueraUnravelingPlantpollinatorInteractions2023,
  title = {Unraveling Plant-Pollinator Interactions from a South-West {{Andean}} Forest in {{Colombia}}},
  author = {{Becoche-Mosquera}, Jorge Mario and {Gomez-Bernal}, Luis German and {Zambrano-Gonzalez}, Giselle and {Angulo-Ortiz}, David},
  year = 2023,
  month = nov,
  journal = {PeerJ},
  volume = {11},
  pages = {e16133},
  publisher = {PeerJ Inc.},
  issn = {2167-8359},
  doi = {10.7717/peerj.16133},
  urldate = {2025-09-21},
  abstract = {Background Loss of biological connectivity increases the vulnerability of ecological dynamics, thereby affecting processes such as pollination. Therefore, it is important to understand the roles of the actors that participate in these interaction networks. Nonetheless, there is a significant oversight regarding the main actors in the pollination networks within the highly biodiverse forests of Colombia. Hence, the present study aims to evaluate the interaction patterns of a network of potential pollinators that inhabit an Andean Forest in Totor\'o, Cauca, Colombia. Methods The interactions between plants and potential pollinators were recorded through direct observation in 10 transects during six field trips conducted over the course of one year. Subsequently, an interaction matrix was developed, and network metrics such as connectance, specialization, nestedness, and asymmetry of interaction strength were evaluated by applying null models. An interpolation/extrapolation curve was calculated in order to assess the representativeness of the sample. Finally, the key species of the network were identified by considering degree (k), centrality, and betweenness centrality. Results A total of 53 plant species and 52 potential pollinator species (including insects and birds) were recorded, with a sample coverage of 88.5\%. Connectance (C = 0.19) and specialization (H2' = 0.19) were low, indicating a generalist network. Freziera canescens, Gaiadendron punctatum, Persea mutisii, Bombus rubicundus, Heliangelus exortis, Chironomus sp., and Metallura tyrianthina were identified as the key species that contribute to a more cohesive network structure. Discussion The present study characterized the structure of the plant-pollinator network in a highly diverse Andean forest in Colombia. It is evident that insects are the largest group of pollinators; however, it is interesting to note that birds form a different module that specializes in pollinating a specific group of plants. On the other hand, the diversity and generality of the species found suggest that the network may be robust against chains of extinction. Nevertheless, the presence of certain introduced species, such as Apis mellifera, and the rapid changes in vegetation cover may affect the dynamics of this mutualistic network. So, it is imperative to apply restoration and conservation strategies to these ecosystems in order to enhance plant-animal interactions and prevent the loss of taxonomical and functional diversity.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-23T11:03:33.322Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/PTL7Z3TV/Becoche-Mosquera et al. - 2023 - Unraveling plant-pollinator interactions from a south-west Andean forest in Colombia.pdf}
}

@article{bickelNonparametricViewNetwork2009,
  title = {A Nonparametric View of Network Models and {{Newman}}--{{Girvan}} and Other Modularities},
  author = {Bickel, Peter J. and Chen, Aiyou},
  year = 2009,
  month = dec,
  journal = {Proc. Natl. Acad. Sci. U.S.A.},
  volume = {106},
  number = {50},
  pages = {21068--21073},
  issn = {0027-8424, 1091-6490},
  doi = {10.1073/pnas.0907096106},
  urldate = {2024-11-22},
  abstract = {Prompted by the increasing interest in networks in many fields, we present an attempt at unifying points of view and analyses of these objects coming from the social sciences, statistics, probability and physics communities. We apply our approach to the Newman--Girvan modularity, widely used for ``community'' detection, among others. Our analysis is asymptotic but we show by simulation and application to real examples that the theory is a reasonable guide to practice.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/VFL87V9L/Bickel et Chen - 2009 - A nonparametric view of network models and Newman–Girvan and other modularities.pdf}
}

@article{biernackiAssessingMixtureModel2000,
  title = {Assessing a Mixture Model for Clustering with the Integrated Completed Likelihood},
  author = {Biernacki, C. and Celeux, G. and Govaert, G.},
  year = 2000,
  month = jul,
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume = {22},
  number = {7},
  pages = {719--725},
  issn = {1939-3539},
  doi = {10.1109/34.865189},
  abstract = {We propose an assessing method of mixture model in a cluster analysis setting with integrated completed likelihood. For this purpose, the observed data are assigned to unknown clusters using a maximum a posteriori operator. Then, the integrated completed likelihood (ICL) is approximated using the Bayesian information criterion (BIC). Numerical experiments on simulated and real data of the resulting ICL criterion show that it performs well both for choosing a mixture model and a relevant number of clusters. In particular, ICL appears to be more robust than BIC to violation of some of the mixture model assumptions and it can select a number of dusters leading to a sensible partitioning of the data.},
  keywords = {Bayesian methods,Context modeling,Gaussian distribution,Numerical simulation,Probability distribution,Robustness},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/MK9H446U/Biernacki et al. - 2000 - Assessing a mixture model for clustering with the .pdf}
}

@misc{blondelAitchisonGeometrySimplex2026,
  title = {Aitchison {{Geometry}} on the {{Simplex}} for {{Uncertainty Quantification}} in {{Bayesian Hyperspectral Image Unmixing}}},
  author = {Blondel, Hector and Drumetz, Lucas and Chonavel, Thierry},
  year = 2026,
  month = mar,
  number = {arXiv:2603.24108},
  eprint = {2603.24108},
  primaryclass = {stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2603.24108},
  urldate = {2026-04-12},
  abstract = {Most algorithms for hyperspectral image unmixing produce point estimates of fractional abundances of the materials to be separated. However, in the absence of reliable ground truth, the ability to perform abundance uncertainty quantification (UQ) should be an important feature of algorithms, e.g. to evaluate how hard the unmixing problem is and how much the results should be trusted. The usual modeling assumptions in Bayesian unmixing rely heavily on the Euclidean geometry of the simplex and typically disregard spatial information. In addition, to our knowledge, abundance UQ is close to nonexistent in the literature. In this paper, we propose to leverage Aitchison geometry used in compositional data analysis to provide practitioners with alternative tools for modeling prior abundance distributions. In particular, we show how to design simplex-valued Gaussian Process priors using this geometry. Then we link Aitchison geometry to constrained optimization and sampling algorithms, and propose UQ diagnostics that comply with the constraints on abundance vectors. We illustrate these concepts on real and simulated data.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Electrical Engineering and Systems Science - Signal Processing,Statistics - Methodology},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-12T17:36:21.854Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/XT9NVRVI/Blondel et al. - 2026 - Aitchison Geometry on the Simplex for Uncertainty Quantification in Bayesian Hyperspectral Image Unm.pdf}
}

@misc{boraCompressedSensingUsing2017,
  title = {Compressed {{Sensing}} Using {{Generative Models}}},
  author = {Bora, Ashish and Jalal, Ajil and Price, Eric and Dimakis, Alexandros G.},
  year = 2017,
  month = mar,
  doi = {10.48550/arXiv.1703.03208},
  urldate = {2026-05-28},
  abstract = {The goal of compressed sensing is to estimate a vector from an underdetermined system of noisy linear measurements, by making use of prior knowledge on the structure of vectors in the relevant domain. For almost all results in this literature, the structure is represented by sparsity in a well-chosen basis. We show how to achieve guarantees similar to standard compressed sensing but without employing sparsity at all. Instead, we suppose that vectors lie near the range of a generative model \$G: \textbackslash mathbb\textbraceleft R\textbraceright\textasciicircum k \textbackslash to \textbackslash mathbb\textbraceleft R\textbraceright\textasciicircum n\$. Our main theorem is that, if \$G\$ is \$L\$-Lipschitz, then roughly \$O(k \textbackslash log L)\$ random Gaussian measurements suffice for an \$\textbackslash ell\_2/\textbackslash ell\_2\$ recovery guarantee. We demonstrate our results using generative models from published variational autoencoder and generative adversarial networks. Our method can use \$5\$-\$10\$x fewer measurements than Lasso for the same accuracy.},
  langid = {english},
  keywords = {/unread},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-28T12:25:27.680Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/6XWQI5ND/Bora et al. - 2017 - Compressed Sensing using Generative Models.pdf}
}

@article{boschPlantPollinatorNetworks2009,
  title = {Plant--Pollinator Networks: Adding the Pollinator's Perspective},
  shorttitle = {Plant--Pollinator Networks},
  author = {Bosch, Jordi and Mart{\'i}n Gonz{\'a}lez, Ana M. and Rodrigo, Anselm and Navarro, David},
  year = 2009,
  journal = {Ecology Letters},
  volume = {12},
  number = {5},
  pages = {409--419},
  issn = {1461-0248},
  doi = {10.1111/j.1461-0248.2009.01296.x},
  urldate = {2024-08-20},
  abstract = {Pollination network studies are based on pollinator surveys conducted on focal plants. This plant-centred approach provides insufficient information on flower visitation habits of rare pollinator species, which are the majority in pollinator communities. As a result, pollination networks contain very high proportions of pollinator species linked to a single plant species (extreme specialists), a pattern that contrasts with the widely accepted view that plant--pollinator interactions are mostly generalized. In this study of a Mediterranean scrubland community in NE Spain we supplement data from an intensive field survey with the analysis of pollen loads carried by pollinators. We observed 4265 contacts corresponding to 19 plant and 122 pollinator species. The addition of pollen data unveiled a very significant number of interactions, resulting in important network structural changes. Connectance increased 1.43-fold, mean plant connectivity went from 18.5 to 26.4, and mean pollinator connectivity from 2.9 to 4.1. Extreme specialist pollinator species decreased 0.6-fold, suggesting that ecological specialization is often overestimated in plant--pollinator networks. We expected a greater connectivity increase in rare species, and consequently a decrease in the level of asymmetric specialization. However, new links preferentially attached to already highly connected nodes and, as a result, both nestedness and centralization increased. The addition of pollen data revealed the existence of four clearly defined modules that were not apparent when only field survey data were used. Three of these modules had a strong phenological component. In comparison to other pollination webs, our network had a high proportion of connector links and species. That is, although significant, the four modules were far from isolated.},
  copyright = {\copyright{} 2009 Blackwell Publishing Ltd/CNRS},
  langid = {english},
  keywords = {Apparent specialization,coevolution,generalization,modularity,nestedness,plant-pollinator interactions,pollen analysis,pollination web,sampling effort},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/C5TQ6Y49/Bosch et al. - 2009 - Plant–pollinator networks adding the pollinator’s perspective.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/BHMVU3DU/j.1461-0248.2009.01296.html}
}

@article{botellaAppraisalGraphEmbeddings2022,
  title = {An Appraisal of Graph Embeddings for Comparing Trophic Network Architectures},
  author = {Botella, Christophe and Dray, St{\'e}phane and Matias, Catherine and Miele, Vincent and Thuiller, Wilfried},
  year = 2022,
  journal = {Methods in Ecology and Evolution},
  volume = {13},
  number = {1},
  pages = {203--216},
  issn = {2041-210X},
  doi = {10.1111/2041-210X.13738},
  urldate = {2024-05-14},
  abstract = {Comparing the architecture of interaction networks in space or time is essential for understanding the assembly, trajectory, functioning and persistence of species communities. Graph embedding methods, which position networks into a vector space where nearby networks have similar architectures, could be ideal tools for this purposes. Here, we evaluated the ability of seven graph embedding methods to disentangle architectural similarities of interactions networks for supervised and unsupervised posterior analytic tasks. The evaluation was carried out over a large number of simulated trophic networks representing variations around six ecological properties and size. We did not find an overall best method and instead showed that the performance of the methods depended on the targeted ecological properties and thus on the research questions. We also highlighted the importance of normalising the embedding for network sizes for meaningful posterior unsupervised analyses. We concluded by orientating potential users to the most suited methods given the question, the targeted network ecological property, and outlined links between those ecological properties and three ecological processes: robustness to extinction, community persistence and ecosystem functioning. We hope this study will stimulate the appropriation of graph embedding methods by ecologists.},
  copyright = {\copyright{} 2021 British Ecological Society},
  langid = {english},
  keywords = {dimension reduction,ecological interaction networks,evaluation,food webs,graph embedding,species interactions,trophic groups,trophic networks},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/P3KZ5UJ7/Botella et al. - 2022 - An appraisal of graph embeddings for comparing tro.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/4HN89Q49/2041-210X.html}
}

@article{braultCoclusteringLatentBloc2015,
  title = {{Co-clustering through Latent Bloc Model: a Review}},
  shorttitle = {{Co-clustering through Latent Bloc Model}},
  author = {Brault, Vincent and Mariadassou, Mahendra},
  year = 2015,
  journal = {Journal de la soci\'et\'e fran\c caise de statistique},
  volume = {156},
  number = {3},
  pages = {120--139},
  issn = {2102-6238},
  urldate = {2024-05-15},
  langid = {french},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ZPMQXEIE/Brault et Mariadassou - 2015 - Co-clustering through Latent Bloc Model a Review.pdf}
}

@misc{braultFastConsistentAlgorithm2023,
  title = {Fast and {{Consistent Algorithm}} for the {{Latent Block Model}}},
  author = {Brault, Vincent and Channarond, Antoine},
  year = 2023,
  month = mar,
  number = {arXiv:1610.09005},
  eprint = {1610.09005},
  primaryclass = {math},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1610.09005},
  urldate = {2025-07-09},
  abstract = {The latent block model is used to simultaneously rank the rows and columns of a matrix to reveal a block structure. The algorithms used for estimation are often time consuming. However, recent work shows that the log-likelihood ratios are equivalent under the complete and observed (with unknown labels) models and the groups posterior distribution to converge as the size of the data increases to a Dirac mass located at the actual groups configuration. Based on these observations, the algorithm Largest Gaps is proposed in this paper to perform clustering using only the marginals of the matrix, when the number of blocks is very small with respect to the size of the whole matrix in the case of binary data. In addition, a model selection method is incorporated with a proof of its consistency. Thus, this paper shows that studying simplistic configurations (few blocks compared to the size of the matrix or very contrasting blocks) with complex algorithms is useless since the marginals already give very good parameter and classification estimates.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Mathematics - Statistics Theory,Statistics - Computation,Statistics - Statistics Theory},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-07-09T13:58:53.533Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5LXC6Y68/Brault et Channarond - 2023 - Fast and Consistent Algorithm for the Latent Block Model.pdf}
}

@article{braultGeneralisationLalgorithmeLargest,
  title = {{G\'en\'eralisation de l'algorithme Largest Gaps pour le mod\`ele des blocs latents non-param\'etrique}},
  author = {Brault, Vincent and Channarond, Antoine and Robert, Val{\'e}rie},
  abstract = {The latent block model assumes there exists a distribution for each crossing between an object cluster and a variable cluster of a data table ; the cells are supposed to be independent conditionally to the choice of these clusters. To estimate the model parameters, most of algorithms are time consuming. Brault and Channarond (2016) proposed to adapt the Largest Gaps algorithm which consists in using the margins. They thus obtained a procedure which estimates all the model parameters consistently but requires a large number of observations. In this talk, we will extend the procedure to the case of any distribution having a second order moment by using an EM algorithm estimation.},
  langid = {french},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-07-09T12:29:43.098Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GIUNC4L3/Brault et al. - Généralisation de l'algorithme Largest Gaps pour le modèle des blocs latents non-paramétrique.pdf}
}

@article{brehenyPenalizedLikelihood,
  title = {Penalized Likelihood},
  author = {Breheny, Patrick},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BP7MKUJI/Breheny - Penalized likelihood.pdf}
}

@misc{bystrovaCausalDiscovery,
  title = {Causal Discovery},
  author = {Bystrova, Daria},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-07-02T09:34:39.476Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NQE5DY92/Bystrova - Causal discovery.pdf}
}

@misc{carrascoUncoveringChallengesSolving2025,
  title = {Uncovering {{Challenges}} of {{Solving}} the {{Continuous Gromov-Wasserstein Problem}}},
  author = {Carrasco, Xavier Aramayo and Nekrashevich, Maksim and Mokrov, Petr and Burnaev, Evgeny and Korotin, Alexander},
  year = 2025,
  month = jun,
  number = {arXiv:2303.05978},
  eprint = {2303.05978},
  primaryclass = {cs},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2303.05978},
  urldate = {2025-06-11},
  abstract = {Recently, the Gromov-Wasserstein Optimal Transport (GWOT) problem has attracted the special attention of the ML community. In this problem, given two distributions supported on two (possibly different) spaces, one has to find the most isometric map between them. In the discrete variant of GWOT, the task is to learn an assignment between given discrete sets of points. In the more advanced continuous formulation, one aims at recovering a parametric mapping between unknown continuous distributions based on i.i.d. samples derived from them. The clear geometrical intuition behind the GWOT makes it a natural choice for several practical use cases, giving rise to a number of proposed solvers. Some of them claim to solve the continuous version of the problem. At the same time, GWOT is notoriously hard, both theoretically and numerically. Moreover, all existing continuous GWOT solvers still heavily rely on discrete techniques. Natural questions arise: to what extent do existing methods unravel the GWOT problem, what difficulties do they encounter, and under which conditions they are successful? Our benchmark paper is an attempt to answer these questions. We specifically focus on the continuous GWOT as the most interesting and debatable setup. We crash-test existing continuous GWOT approaches on different scenarios, carefully record and analyze the obtained results, and identify issues. Our findings experimentally testify that the scientific community is still missing a reliable continuous GWOT solver, which necessitates further research efforts. As the first step in this direction, we propose a new continuous GWOT method which does not rely on discrete techniques and partially solves some of the problems of the competitors.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Machine Learning},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-06-11T15:49:10.770Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DRW3LBZT/Carrasco et al. - 2025 - Uncovering Challenges of Solving the Continuous Gromov-Wasserstein Problem.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/XNIYH7BN/2303.html}
}

@article{cassolKeyFeaturesGuidelines2025,
  title = {Key Features and Guidelines for the Application of Microbial Alpha Diversity Metrics},
  author = {Cassol, Ignacio and Iba{\~n}ez, Mauro and Bustamante, Juan Pablo},
  year = 2025,
  month = jan,
  journal = {Sci Rep},
  volume = {15},
  number = {1},
  pages = {622},
  publisher = {Nature Publishing Group},
  issn = {2045-2322},
  doi = {10.1038/s41598-024-77864-y},
  urldate = {2025-08-18},
  abstract = {Studies of microbial communities vary widely in terms of analysis methods. In this growing field, the wide variety of diversity measures and lack of consistency make it harder to compare different studies. Most existing alpha diversity metrics are inherited from other disciplines and their assumptions are not always directly meaningful or true for microbiome data. Many existing microbiome studies apply one or some alpha diversity metrics with no fundamentals but also an unclear results interpretation. This work focuses on a theoretical, empirical, and comparative analysis of 19 frequently and less-frequently used microbial alpha diversity metrics grouped into 4 proposed categories, including key features of every analyzed metric with their mathematical assumptions, to provide a deeper understanding of the existing metrics and a practical implementation guide for future studies. Key metrics that should be required in microbiome analysis include richness, phylogenetic diversity, entropy, dominance of a few microbes over others, and an estimate of unobserved microbes. Collectively, these metrics contribute to a comprehensive set of analyses characterizing samples, allowing the determination of key aspects that might be otherwise obscured by partial or biased information. These guidelines enable further detailed analysis by each author according to their specific interests and clinical trials. Several practical examples are provided to illustrate how these recommendations improve the quality and depth of information obtained, facilitating better interpretation when working with microbiome data. These guidelines can be applied to both existing and future research studies, enhancing the standardization, consistency, and robustness of the analyses conducted. This approach aims to improve the capture of biological diversity, leading to better interpretations and insights.},
  copyright = {2025 The Author(s)},
  langid = {english},
  keywords = {Biodiversity,Data processing,Microbiome,Standards},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-08-18T15:11:42.130Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8HYIRL89/Cassol et al. - 2025 - Key features and guidelines for the application of microbial alpha diversity metrics.pdf}
}

@article{celisseConsistencyMaximumlikelihoodVariational2012,
  title = {Consistency of Maximum-Likelihood and Variational Estimators in the Stochastic Block Model},
  author = {Celisse, Alain and Daudin, Jean-Jacques and Pierre, Laurent},
  year = 2012,
  month = jan,
  journal = {Electronic Journal of Statistics},
  volume = {6},
  number = {none},
  pages = {1847--1899},
  publisher = {{Institute of Mathematical Statistics and Bernoulli Society}},
  issn = {1935-7524, 1935-7524},
  doi = {10.1214/12-EJS729},
  urldate = {2023-06-06},
  abstract = {The stochastic block model (SBM) is a probabilistic model designed to describe heterogeneous directed and undirected graphs. In this paper, we address the asymptotic inference in SBM by use of maximum-likelihood and variational approaches. The identifiability of SBM is proved while asymptotic properties of maximum-likelihood and variational estimators are derived. In particular, the consistency of these estimators is settled for the probability of an edge between two vertices (and for the group proportions at the price of an additional assumption), which is to the best of our knowledge the first result of this type for variational estimators in random graphs.},
  keywords = {62E17,62G05,62G20,62H30,Concentration inequalities,consistency,maximum likelihood estimators,Random graphs,Stochastic block model,variational estimators},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/42EBTTAX/celisse2012.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/JNWRIYKG/celisse2012.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/XG463B5I/Celisse et al. - 2012 - Consistency of maximum-likelihood and variational .pdf}
}

@misc{chabert-liddellLearningCommonStructures2023,
  type = {Article},
  title = {Learning Common Structures in a Collection of Networks. {{An}} Application to Food Webs},
  author = {{Chabert-Liddell}, Saint-Clair and Barbillon, Pierre and Donnet, Sophie},
  year = 2023,
  month = mar,
  number = {arXiv:2206.00560},
  eprint = {2206.00560},
  primaryclass = {stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2206.00560},
  urldate = {2023-05-22},
  abstract = {Let a collection of networks represent interactions within several (social or ecological) systems. We pursue two objectives: identifying similarities in the topological structures that are held in common between the networks and clustering the collection into sub-collections of structurally homogeneous networks. We tackle these two questions with a probabilistic model based approach. We propose an extension of the Stochastic Block Model (SBM) adapted to the joint modeling of a collection of networks. The networks in the collection are assumed to be independent realizations of SBMs. The common connectivity structure is imposed through the equality of some parameters. The model parameters are estimated with a variational Expectation-Maximization (EM) algorithm. We derive an ad-hoc penalized likelihood criterion to select the number of blocks and to assess the adequacy of the consensus found between the structures of the different networks. This same criterion can also be used to cluster networks on the basis of their connectivity structure. It thus provides a partition of the collection into subsets of structurally homogeneous networks. The relevance of our proposition is assessed on two collections of ecological networks. First, an application to three stream food webs reveals the homogeneity of their structures and the correspondence between groups of species in different ecosystems playing equivalent ecological roles. Moreover, the joint analysis allows a finer analysis of the structure of smaller networks. Second, we cluster 67 food webs according to their connectivity structures and demonstrate that five mesoscale structures are sufficient to describe this collection.},
  archiveprefix = {arXiv},
  keywords = {Statistics - Applications,Statistics - Methodology},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/M74TXGCF/Chabert-Liddell et al. - 2023 - Learning common structures in a collection of netw.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/A35M8KNP/2206.html}
}

@article{chabert-liddellLearningCommonStructures2024,
  title = {Learning Common Structures in a Collection of Networks. {{An}} Application to Food Webs},
  author = {{Chabert-Liddell}, Saint-Clair and Barbillon, Pierre and Donnet, Sophie},
  year = 2024,
  month = jun,
  journal = {The Annals of Applied Statistics},
  volume = {18},
  number = {2},
  pages = {1213--1235},
  publisher = {Institute of Mathematical Statistics},
  issn = {1932-6157, 1941-7330},
  doi = {10.1214/23-AOAS1831},
  urldate = {2024-05-16},
  abstract = {Let a collection of networks represent interactions within several (social or ecological) systems. We pursue two objectives: identifying similarities in the topological structures that are held in common between the networks and clustering the collection into subcollections of structurally homogeneous networks. We tackle these two questions with a probabilistic model-based approach. We propose an extension of the stochastic block model (SBM) adapted to the joint modeling of a collection of networks. The networks in the collection are assumed to be independent realizations of SBMs. The common connectivity structure is imposed through the equality of some parameters. The model parameters are estimated with a variational expectation-maximization (EM) algorithm. We derive an ad hoc penalized likelihood criterion to select the number of blocks and to assess the adequacy of the consensus found between the structures of the different networks. This same criterion can also be used to cluster networks on the basis of their connectivity structure. It thus provides a partition of the collection into subsets of structurally homogeneous networks. The relevance of our proposition is assessed on two collections of ecological networks. First, an application to three stream food webs reveals the homogeneity of their structures and the correspondence between groups of species in different ecosystems playing equivalent ecological roles. Moreover, the joint analysis allows a finer analysis of the structure of smaller networks. Second, we cluster 67 food webs according to their connectivity structures and demonstrate that five mesoscale structures are sufficient to describe this collection.},
  keywords = {clustering,ecology,latent variable models,networks,Stochastic block model},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/4USKD3WW/Chabert-Liddell et al. - 2024 - Learning common structures in a collection of netw.pdf}
}

@article{chabert-liddellStatisticalLearningCollections,
  title = {{Statistical learning of collections of networks with applications in ecology and sociology}},
  author = {{Chabert-Liddell}, Saint-Clair},
  langid = {french},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-20T12:28:31.466Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ZVDB6I7E/Bel - Saint-Clair CHABERT-LIDDELL.pdf}
}

@article{chabert-liddellStochasticBlockModel2021,
  title = {A {{Stochastic Block Model Approach}} for the {{Analysis}} of {{Multilevel Networks}}: An {{Application}} to the {{Sociology}} of {{Organizations}}},
  shorttitle = {A {{Stochastic Block Model Approach}} for the {{Analysis}} of {{Multilevel Networks}}},
  author = {{Chabert-Liddell}, Saint-Clair and Barbillon, Pierre and Donnet, Sophie and Lazega, Emmanuel},
  year = 2021,
  month = jun,
  journal = {Computational Statistics \& Data Analysis},
  volume = {158},
  eprint = {1910.10512},
  primaryclass = {stat},
  pages = {107179},
  issn = {01679473},
  doi = {10.1016/j.csda.2021.107179},
  urldate = {2025-09-26},
  abstract = {A multilevel network is defined as the junction of two interaction networks, one level representing the interactions between individuals and the other the interactions between organizations. The levels are linked by an affiliation relationship, each individual belonging to a unique organization. A new Stochastic Block Model is proposed as a unified probalistic framework tailored for multilevel networks. This model contains latent blocks accounting for heterogeneity in the patterns of connection within each level and introducing dependencies between the levels. The sought connection patterns are not specified a priori which makes this approach flexible. Variational methods are used for the model inference and an Integrated Classified Likelihood criterion is developed for choosing the number of blocks and also for deciding whether the two levels are dependent or not. A comprehensive simulation study exhibits the benefit of considering this approach, illustrates the robustness of the clustering and highlights the reliability of the criterion used for model selection. This approach is applied on a sociological dataset collected during a television program trade fair, the inter-organizational level being the economic network between companies and the inter-individual level being the informal network between their representatives. It brings a synthetic representation of the two networks unraveling their intertwined structure and confirms the coopetition at stake.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Social and Information Networks,Statistics - Applications,Statistics - Methodology},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-26T08:52:07.522Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/6XFLLTKL/Chabert-Liddell et al. - 2021 - A Stochastic Block Model Approach for the Analysis of Multilevel Networks an Application to the Soc.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/8DAMTVD8/1910.html}
}

@article{chacoffInteractionFrequencyNetwork2018,
  title = {Interaction Frequency, Network Position, and the Temporal Persistence of Interactions in a Plant--Pollinator Network},
  author = {Chacoff, Natacha P. and Resasco, Julian and V{\'a}zquez, Diego P.},
  year = 2018,
  journal = {Ecology},
  volume = {99},
  number = {1},
  pages = {21--28},
  issn = {1939-9170},
  doi = {10.1002/ecy.2063},
  urldate = {2026-06-10},
  abstract = {Ecological interactions are highly dynamic in time and space. Previous studies of plant--animal mutualistic networks have shown that the occurrence of interactions varies substantially across years. We analyzed interannual variation of a quantitative mutualistic network, in which links are weighted by interaction frequency. The network was sampled over six consecutive years, representing one of the longest time series for a community-wide mutualistic network. We estimated the interannual similarity in interactions and assessed the determinants of their persistence. The occurrence of interactions varied greatly among years, with most interactions seen in only one year (64\%) and few (20\%) in more than two years. This variation was associated with the frequency and position of interactions relative to the network core, so that the network consisted of a persistent core of frequent interactions and many peripheral, infrequent interactions. Null model analyses suggest that species abundances play a substantial role in generating these patterns. Our study represents an important step in the study of ecological networks, furthering our mechanistic understanding of the ecological processes driving the temporal persistence of interactions.},
  copyright = {\copyright{} 2017 by the Ecological Society of America},
  langid = {english},
  keywords = {/unread,interaction frequency,Monte Desert,nestedness,network core,network dynamics,null model,sampling artifacts,species abundance,temporal variability},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-10T15:30:17.010Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/33QVBXTA/Chacoff et al. - 2018 - Interaction frequency, network position, and the temporal persistence of interactions in a plant–pol.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/JV67XEWD/ecy.html}
}

@article{chaffronCommunityNetworkModels,
  title = {Community Network Models to Reveal Marine Plankton Systems Ecology and Evolution},
  author = {Chaffron, Samuel},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-11-28T12:53:30.140Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ZPM4LQIN/Chaffron - Community network models to reveal marine plankton systems ecology and evolution.pdf}
}

@article{chaillouOriginEcologicalSelection2015,
  title = {Origin and Ecological Selection of Core and Food-Specific Bacterial Communities Associated with Meat and Seafood Spoilage},
  author = {Chaillou, St{\'e}phane and {Chaulot-Talmon}, Aur{\'e}lie and Caekebeke, H{\'e}l{\`e}ne and Cardinal, Mireille and Christieans, Souad and Denis, Catherine and Desmonts, Marie H{\'e}l{\`e}ne and Dousset, Xavier and Feurer, Carole and Hamon, Erwann and Joffraud, Jean-Jacques and La Carbona, St{\'e}phanie and Leroi, Fran{\c c}oise and Leroy, Sabine and Lorre, Sylvie and Mac{\'e}, Sabrina and Pilet, Marie-France and Pr{\'e}vost, Herv{\'e} and Rivollier, Marina and Roux, Dephine and Talon, R{\'e}gine and Zagorec, Monique and {Champomier-Verg{\`e}s}, Marie-Christine},
  year = 2015,
  month = may,
  journal = {ISME J},
  volume = {9},
  number = {5},
  pages = {1105--1118},
  issn = {1751-7370},
  doi = {10.1038/ismej.2014.202},
  abstract = {The microbial spoilage of meat and seafood products with short shelf lives is responsible for a significant amount of food waste. Food spoilage is a very heterogeneous process, involving the growth of various, poorly characterized bacterial communities. In this study, we conducted 16S ribosomal RNA gene pyrosequencing on 160 samples of fresh and spoiled foods to comparatively explore the bacterial communities associated with four meat products and four seafood products that are among the most consumed food items in Europe. We show that fresh products are contaminated in part by a microbiota similar to that found on the skin and in the gut of animals. However, this animal-derived microbiota was less prevalent and less abundant than a core microbiota, psychrotrophic in nature, mainly originated from the environment (water reservoirs). We clearly show that this core community found on meat and seafood products is the main reservoir of spoilage bacteria. We also show that storage conditions exert strong selective pressure on the initial microbiota: alpha diversity in fresh samples was 189\textpm 58 operational taxonomic units (OTUs) but dropped to 27\textpm 12 OTUs in spoiled samples. The OTU assemblage associated with spoilage was shaped by low storage temperatures, packaging and the nutritional value of the food matrix itself. These factors presumably act in tandem without any hierarchical pattern. Most notably, we were also able to identify putative new clades of dominant, previously undescribed bacteria occurring on spoiled seafood, a finding that emphasizes the importance of using culture-independent methods when studying food microbiota.},
  langid = {english},
  pmcid = {PMC4409155},
  pmid = {25333463},
  keywords = {/unread,Animals,Bacteria,DNA Barcoding Taxonomic,Europe,Food Contamination,Food Microbiology,Meat,Microbiota,Polymerase Chain Reaction,RNA Ribosomal 16S,Seafood},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-05T13:01:22.433Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/M38NYW3E/Chaillou et al. - 2015 - Origin and ecological selection of core and food-specific bacterial communities associated with meat.pdf}
}

@article{channarondClassificationEstimationStochastic2012,
  title = {Classification and Estimation in the {{Stochastic Blockmodel}} Based on the Empirical Degrees},
  author = {Channarond, Antoine and Daudin, Jean-Jacques and Robin, St{\'e}phane},
  year = 2012,
  month = jan,
  journal = {Electron. J. Statist.},
  volume = {6},
  number = {none},
  publisher = {Institute of Mathematical Statistics},
  issn = {1935-7524},
  doi = {10.1214/12-ejs753},
  urldate = {2025-07-09},
  abstract = {The Stochastic Blockmodel [16] is a mixture model for heterogeneous network data. Unlike the usual statistical framework, new nodes give additional information about the previous ones in this model. Thereby the distribution of the degrees concentrates in points conditionally on the node class. We show under a mild assumption that classification, estimation and model selection can actually be achieved with no more than the empirical degree data. We provide an algorithm able to process very large networks and consistent estimators based on it. In particular, we prove a bound of the probability of misclassification of at least one node, including when the number of classes grows.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-07-09T13:59:33.921Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8TL8AJ2G/Channarond et al. - 2012 - Classification and estimation in the Stochastic Blockmodel based on the empirical degrees.pdf}
}

@misc{chaussardTreebasedVariationalInference2025,
  title = {Tree-Based Variational Inference for {{Poisson}} Log-Normal Models},
  author = {Chaussard, Alexandre and Bonnet, Anna and Gassiat, Elisabeth and Corff, Sylvain Le},
  year = 2025,
  month = jun,
  number = {arXiv:2406.17361},
  eprint = {2406.17361},
  primaryclass = {stat},
  doi = {10.48550/arXiv.2406.17361},
  urldate = {2025-10-22},
  abstract = {When studying ecosystems, hierarchical trees are often used to organize entities based on proximity criteria, such as the taxonomy in microbiology, social classes in geography, or product types in retail businesses, offering valuable insights into entity relationships. Despite their significance, current count-data models do not leverage this structured information. In particular, the widely used Poisson log-normal (PLN) model, known for its ability to model interactions between entities from count data, lacks the possibility to incorporate such hierarchical tree structures, limiting its applicability in domains characterized by such complexities. To address this matter, we introduce the PLN-Tree model as an extension of the PLN model, specifically designed for modeling hierarchical count data. By integrating structured variational inference techniques, we propose an adapted training procedure and establish identifiability results, enhancing both theoretical foundations and practical interpretability. Experiments on synthetic datasets and human gut microbiome data highlight generative improvements when using PLN-Tree, demonstrating the practical interest of knowledge graphs like the taxonomy in microbiome modeling. Additionally, we present a proof-of-concept implication of the identifiability results by illustrating the practical benefits of using identifiable features for classification tasks, showcasing the versatility of the framework.},
  archiveprefix = {arXiv},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-10-22T15:07:42.419Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/PCCDFVLB/Chaussard et al. - 2025 - Tree-based variational inference for Poisson log-normal models.pdf}
}

@article{chenAssociatingMicrobiomeComposition2012,
  title = {Associating Microbiome Composition with Environmental Covariates Using Generalized {{UniFrac}} Distances},
  author = {Chen, Jun and Bittinger, Kyle and Charlson, Emily S. and Hoffmann, Christian and Lewis, James and Wu, Gary D. and Collman, Ronald G. and Bushman, Frederic D. and Li, Hongzhe},
  year = 2012,
  month = aug,
  journal = {Bioinformatics},
  volume = {28},
  number = {16},
  pages = {2106--2113},
  issn = {1367-4811, 1367-4803},
  doi = {10.1093/bioinformatics/bts342},
  urldate = {2025-11-07},
  abstract = {Motivation: The human microbiome plays an important role in human disease and health. Identification of factors that affect the microbiome composition can provide insights into disease mechanism as well as suggest ways to modulate the microbiome composition for therapeutical purposes. Distance-based statistical tests have been applied to test the association of microbiome composition with environmental or biological covariates. The unweighted and weighted UniFrac distances are the most widely used distance measures. However, these two measures assign too much weight either to rare lineages or to most abundant lineages, which can lead to loss of power when the important composition change occurs in moderately abundant lineages.},
  copyright = {http://creativecommons.org/licenses/by-nc/3.0},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-11-07T14:53:58.882Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/YNIF46ZM/Chen et al. - 2012 - Associating microbiome composition with environmental covariates using generalized UniFrac distances.pdf}
}

@misc{chiquetFastTreeInference2015,
  title = {Fast Tree Inference with Weighted Fusion Penalties},
  author = {Chiquet, Julien and Gutierrez, Pierre and Rigaill, Guillem},
  year = 2015,
  month = may,
  number = {arXiv:1407.5915},
  eprint = {1407.5915},
  primaryclass = {stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1407.5915},
  urldate = {2026-01-07},
  abstract = {Given a data set with many features observed in a large number of conditions, it is desirable to fuse and aggregate conditions which are similar to ease the interpretation and extract the main characteristics of the data. This paper presents a multidimensional fusion penalty framework to address this question when the number of conditions is large. If the fusion penalty is encoded by an `q-norm, we prove for uniform weights that the path of solutions is a tree which is suitable for interpretability. For the `1 and `{$\infty$}-norms, the path is piecewise linear and we derive a homotopy algorithm to recover exactly the whole tree structure. For weighted `1-fusion penalties, we demonstrate that distance-decreasing weights lead to balanced tree structures. For a subclass of these weights that we call ``exponentially adaptive'', we derive an O(n log(n)) homotopy algorithm and we prove an asymptotic oracle property. This guarantees that we recover the underlying structure of the data efficiently both from a statistical and a computational point of view. We provide a fast implementation of the homotopy algorithm for the single feature case, as well as an efficient embedded cross-validation procedure that takes advantage of the tree structure of the path of solutions. Our proposal outperforms its competing procedures on simulations both in terms of timings and prediction accuracy. As an example we consider phenotypic data: given one or several traits, we reconstruct a balanced tree structure and assess its agreement with the known taxonomy.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Statistics - Computation},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-01-07T14:25:09.306Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GJHCV5IX/Chiquet et al. - 2015 - Fast tree inference with weighted fusion penalties.pdf}
}

@misc{chiquetSbmStochasticBlockmodels2024,
  title = {Sbm: {{Stochastic Blockmodels}}},
  shorttitle = {Sbm},
  author = {Chiquet, Julien and Donnet, Sophie and Barbillon, Pierre},
  year = 2024,
  month = sep,
  urldate = {2024-11-04},
  abstract = {A collection of tools and functions to adjust a variety of stochastic blockmodels (SBM). Supports at the moment Simple, Bipartite, 'Multipartite' and Multiplex SBM (undirected or directed with Bernoulli, Poisson or Gaussian emission laws on the edges, and possibly covariate for Simple and Bipartite SBM). See L\'eger (2016) {$<$}doi:10.48550/arXiv.1602.07587{$>$}, 'Barbillon et al.' (2020) {$<$}doi:10.1111/rssa.12193{$>$} and 'Bar-Hen et al.' (2020) {$<$}doi:10.48550/arXiv.1807.10138{$>$}.},
  copyright = {GPL ({$\geq$} 3)}
}

@article{clausetHierarchicalStructurePrediction2008,
  title = {Hierarchical Structure and the Prediction of Missing Links in Networks},
  author = {Clauset, Aaron and Moore, Cristopher and Newman, M. E. J.},
  year = 2008,
  month = may,
  journal = {Nature},
  volume = {453},
  number = {7191},
  pages = {98--101},
  publisher = {Nature Publishing Group},
  issn = {1476-4687},
  doi = {10.1038/nature06830},
  urldate = {2025-09-19},
  abstract = {Networks are now a ubiquitous tool for representing the structure of complex systems, including the Internet, social networks, food webs, and protein and genetic networks. Unfortunately, the data describing these networks are in many cases incomplete or biased. A new study provides a general technique to divide network vertices into groups and sub-groups. Revealing such underlying hierarchies makes it possible to predict missing links from partial data with higher accuracy than previous methods.},
  copyright = {2008 Springer Nature Limited},
  langid = {english},
  keywords = {Humanities and Social Sciences,multidisciplinary,Science},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-19T12:33:29.962Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/Y4FR2F3U/Clauset et al. - 2008 - Hierarchical structure and the prediction of missing links in networks.pdf}
}

@incollection{ClusteringLargeApplications1990,
  title = {Clustering {{Large Applications}} ({{Program CLARA}})},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {126--163},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.ch3},
  urldate = {2024-09-13},
  abstract = {The prelims comprise: Short Description of the Method How to Use the Program CLARA An Example More on the Algorithm and the Program Related Methods and References},
  chapter = {3},
  isbn = {978-0-470-31680-1},
  langid = {english},
  keywords = {clustering large applications,computation time,data sets,euclidean distance,interactive session},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R28XFDII/1990 - Clustering Large Applications (Program CLARA).pdf}
}

@article{corsoConnectivityNestednessBipartite2011,
  title = {Connectivity and {{Nestedness}} in {{Bipartite Networks}} from {{Community Ecology}}},
  author = {Corso, Gilberto and De Araujo, A I Levartoski and De Almeida, Adriana M},
  year = 2011,
  month = mar,
  journal = {J. Phys.: Conf. Ser.},
  volume = {285},
  pages = {012009},
  issn = {1742-6596},
  doi = {10.1088/1742-6596/285/1/012009},
  urldate = {2024-11-05},
  abstract = {Bipartite networks and the nestedness concept appear in two different contexts in theoretical ecology: community ecology and islands biogeography. From a mathematical perspective nestedness is a pattern in a bipartite network. There are several nestedness indices in the market, we used the index {$\nu$}. The index {$\nu$} is found using the relation {$\nu$} = 1 - {$\tau$} where {$\tau$} is the temperature of the adjacency matrix of the bipartite network. By its turn {$\tau$} is defined with help of the Manhattan distance of the occupied elements of the adjacency matrix of the bipartite network. We prove that the nestedness index {$\nu$} is a function of the connectivities of the bipartite network. In addition we find a concise way to find {$\nu$} which avoid cumbersome algorithm manupulation of the adjacency matrix.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/VJTV2ZT8/Corso et al. - 2011 - Connectivity and Nestedness in Bipartite Networks from Community Ecology.pdf}
}

@article{csilleryApproximateBayesianComputation2010a,
  title = {Approximate {{Bayesian Computation}} ({{ABC}}) in Practice},
  author = {Csill{\'e}ry, Katalin and Blum, Michael G. B. and Gaggiotti, Oscar E. and Fran{\c c}ois, Olivier},
  year = 2010,
  month = jul,
  journal = {Trends Ecol Evol},
  volume = {25},
  number = {7},
  pages = {410--418},
  issn = {0169-5347},
  doi = {10.1016/j.tree.2010.04.001},
  abstract = {Understanding the forces that influence natural variation within and among populations has been a major objective of evolutionary biologists for decades. Motivated by the growth in computational power and data complexity, modern approaches to this question make intensive use of simulation methods. Approximate Bayesian Computation (ABC) is one of these methods. Here we review the foundations of ABC, its recent algorithmic developments, and its applications in evolutionary biology and ecology. We argue that the use of ABC should incorporate all aspects of Bayesian data analysis: formulation, fitting, and improvement of a model. ABC can be a powerful tool to make inferences with complex models if these principles are carefully applied.},
  langid = {english},
  pmid = {20488578},
  keywords = {Africa,Algorithms,Animals,Bayes Theorem,Biodiversity,Biological Evolution,Biostatistics,Demography,Drosophila melanogaster,Models Genetic},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-07T13:22:11.099Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/6PFK56LT/Csilléry et al. - 2010 - Approximate Bayesian Computation (ABC) in practice.pdf}
}

@article{csilleryApproximateBayesianComputationa,
  title = {Approximate {{Bayesian Computation}} ({{ABC}}) in {{R}}: {{A Vignette}}},
  author = {Csill{\'e}ry, K and Lemaire, L and Fran{\c c}ois, O and Blum, {\relax MGB}},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-05T09:10:00.661Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GD92EWSR/Csilléry et al. - Approximate Bayesian Computation (ABC) in R A Vignette.pdf}
}

@article{daigavaneUnderstandingConvolutionsGraphs2021,
  title = {Understanding {{Convolutions}} on {{Graphs}}},
  author = {Daigavane, Ameya and Ravindran, Balaraman and Aggarwal, Gaurav},
  year = 2021,
  month = sep,
  journal = {Distill},
  volume = {6},
  number = {9},
  pages = {e32},
  issn = {2476-0757},
  doi = {10.23915/distill.00032},
  urldate = {2024-05-21},
  abstract = {Understanding the building blocks and design choices of graph neural networks.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/RBH4EXGY/understanding-gnns.html}
}

@article{daudinMixtureModelRandom2008,
  title = {A Mixture Model for Random Graphs},
  author = {Daudin, J.-J. and Picard, F. and Robin, S.},
  year = 2008,
  month = jun,
  journal = {Stat Comput},
  volume = {18},
  number = {2},
  pages = {173--183},
  issn = {1573-1375},
  doi = {10.1007/s11222-007-9046-7},
  urldate = {2023-06-16},
  abstract = {The Erd\"os--R\'enyi model of a network is simple and possesses many explicit expressions for average and asymptotic properties, but it does not fit well to real-world networks. The vertices of those networks are often structured in unknown classes (functionally related proteins or social communities) with different connectivity properties. The stochastic block structures model was proposed for this purpose in the context of social sciences, using a Bayesian approach. We consider the same model in a frequentest statistical framework. We give the degree distribution and the clustering coefficient associated with this model, a variational method to estimate its parameters and a model selection criterion to select the number of classes. This estimation procedure allows us to deal with large networks containing thousands of vertices. The method is used to uncover the modular structure of a network of enzymatic reactions.},
  langid = {english},
  keywords = {Mixture models,Random graphs,Variational method,Variationalmethod},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/439HK27B/Daudin et al. - 2008 - A mixture model for random graphs.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/FWPWMKUW/daudin2007.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/HVVF5MNY/daudin2007.pdf.pdf}
}

@article{daveziesAnalyticInferenceMultiway,
  title = {Analytic Inference with Multiway Clustering},
  author = {Davezies, Laurent and D'Haultf{\oe}uille, Xavier and Guyonvarch, Yannick},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/IV3VF3AT/Davezies et al. - Analytic inference with multiway clustering.pdf}
}

@misc{delonWassersteintypeDistanceSpace2020,
  title = {A {{Wasserstein-type}} Distance in the Space of {{Gaussian Mixture Models}}},
  author = {Delon, Julie and Desolneux, Agnes},
  year = 2020,
  month = jun,
  number = {arXiv:1907.05254},
  eprint = {1907.05254},
  primaryclass = {math},
  publisher = {arXiv},
  urldate = {2024-06-06},
  abstract = {In this paper we introduce a Wasserstein-type distance on the set of Gaussian mixture models. This distance is defined by restricting the set of possible coupling measures in the optimal transport problem to Gaussian mixture models. We derive a very simple discrete formulation for this distance, which makes it suitable for high dimensional problems. We also study the corresponding multimarginal and barycenter formulations. We show some properties of this Wasserstein-type distance, and we illustrate its practical use with some examples in image processing.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Mathematics - Optimization and Control},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/UP2URTE2/Delon et Desolneux - 2020 - A Wasserstein-type distance in the space of Gaussi.pdf}
}

@article{demanincorDoesPhenologyExplain2020,
  title = {Does Phenology Explain Plant--Pollinator Interactions at Different Latitudes? {{An}} Assessment of Its Explanatory Power in Plant--Hoverfly Networks in {{French}} Calcareous Grasslands},
  shorttitle = {Does Phenology Explain Plant--Pollinator Interactions at Different Latitudes?},
  author = {{de Manincor}, Natasha and Hautekeete, Nina and Piquot, Yves and Schatz, Bertrand and Vanappelghem, C{\'e}dric and Massol, Fran{\c c}ois},
  year = 2020,
  journal = {Oikos},
  volume = {129},
  number = {5},
  pages = {753--765},
  issn = {1600-0706},
  doi = {10.1111/oik.07259},
  urldate = {2026-06-10},
  abstract = {For plant--pollinator interactions to occur, the flowering of plants and the flying period of pollinators (i.e. their phenologies) have to overlap. Yet, few models make use of this principle to predict interactions and fewer still are able to compare interaction networks of different sizes. Here, we tackled both challenges using Bayesian structural equation models (SEM), incorporating the effect of phenological overlap in six plant--hoverfly networks. Insect and plant abundances were strong determinants of the number of visits, while phenology overlap alone was not sufficient, but significantly improved model fit. Phenology overlap was a stronger determinant of plant--pollinator interactions in sites where the average overlap was longer and network compartmentalization was weaker, i.e. at higher latitudes. Our approach highlights the advantages of using Bayesian SEMs to compare interaction networks of different sizes along environmental gradients and articulates the various steps needed to do so.},
  copyright = {\copyright{} 2020 Nordic Society Oikos. Published by John Wiley \& Sons Ltd},
  langid = {english},
  keywords = {/unread,Bayesian model,interaction probability,latent block model,latitudinal gradient,mutualistic network,phenology overlap,species abundance,structural equation model},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-10T15:43:45.787Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/74LCWZIQ/de Manincor et al. - 2020 - Does phenology explain plant–pollinator interactions at different latitudes An assessment of its ex.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/55RVRYKD/oik.html}
}

@article{dempsterMaximumLikelihoodIncomplete1977,
  title = {Maximum {{Likelihood}} from {{Incomplete Data}} via the {{EM Algorithm}}},
  author = {Dempster, A. P. and Laird, N. M. and Rubin, D. B.},
  year = 1977,
  journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume = {39},
  number = {1},
  eprint = {2984875},
  eprinttype = {jstor},
  pages = {1--38},
  publisher = {[Royal Statistical Society, Oxford University Press]},
  issn = {0035-9246},
  urldate = {2025-05-27},
  abstract = {A broadly applicable algorithm for computing maximum likelihood estimates from incomplete data is presented at various levels of generality. Theory showing the monotone behaviour of the likelihood and convergence of the algorithm is derived. Many examples are sketched, including missing value situations, applications to grouped, censored or truncated data, finite mixture models, variance component estimation, hyperparameter estimation, iteratively reweighted least squares and factor analysis.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-27T16:20:41.925Z}
}

@misc{derrUsingNetworkDensity2024,
  title = {Using {{Network Density}} to {{Evaluate}} and {{Optimize Collaboration Intensity}}},
  author = {Derr, Alex},
  year = 2024,
  month = nov,
  journal = {Visible Network Labs},
  urldate = {2025-09-21},
  abstract = {Learn how to assess network density to optimize collaboration, prevent overload, and strengthen connectivity using network analysis for strategic insights.},
  langid = {american},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-23T11:03:32.617Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WEZWX3H9/using-network-density-to-evaluate-and-optimize-collaboration-intensity.html}
}

@article{desjardins-proulxEcologicalInteractionsNetflix2017,
  title = {Ecological Interactions and the {{Netflix}} Problem},
  author = {{Desjardins-Proulx}, Philippe and Laigle, Idaline and Poisot, Timoth{\'e}e and Gravel, Dominique},
  year = 2017,
  month = aug,
  journal = {PeerJ},
  volume = {5},
  pages = {e3644},
  publisher = {PeerJ Inc.},
  issn = {2167-8359},
  doi = {10.7717/peerj.3644},
  urldate = {2023-06-15},
  abstract = {Species interactions are a key component of ecosystems but we generally have an incomplete picture of who-eats-who in a given community. Different techniques have been devised to predict species interactions using theoretical models or abundances. Here, we explore the K nearest neighbour approach, with a special emphasis on recommendation, along with a supervised machine learning technique. Recommenders are algorithms developed for companies like Netflix to predict whether a customer will like a product given the preferences of similar customers. These machine learning techniques are well-suited to study binary ecological interactions since they focus on positive-only data. By removing a prey from a predator, we find that recommenders can guess the missing prey around 50\% of the times on the first try, with up to 881 possibilities. Traits do not improve significantly the results for the K nearest neighbour, although a simple test with a supervised learning approach (random forests) show we can predict interactions with high accuracy using only three traits per species. This result shows that binary interactions can be predicted without regard to the ecological community given only three variables: body mass and two variables for the species' phylogeny. These techniques are complementary, as recommenders can predict interactions in the absence of traits, using only information about other species' interactions, while supervised learning algorithms such as random forests base their predictions on traits only but do not exploit other species' interactions. Further work should focus on developing custom similarity measures specialized for ecology to improve the KNN algorithms and using richer data to capture indirect relationships between species.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/3L7JALP4/Desjardins-Proulx et al. - 2017 - Ecological interactions and the Netflix problem.pdf}
}

@article{desmetAdvantagesLimitationsCurrent2010,
  title = {Advantages and Limitations of Current Network Inference Methods},
  author = {De Smet, Riet and Marchal, Kathleen},
  year = 2010,
  month = oct,
  journal = {Nat Rev Microbiol},
  volume = {8},
  number = {10},
  pages = {717--729},
  publisher = {Nature Publishing Group},
  issn = {1740-1534},
  doi = {10.1038/nrmicro2419},
  urldate = {2024-05-16},
  abstract = {Recently several novel tools for inferring transcriptional networks from expression data have been developed. Computationally inferred interactions offer a useful resource to complement experimental findings, but the direct integration of inference tools in daily laboratory practice remains limited, because the choice of the appropriate network tool is not obvious.Network inference is, mathematically, an underdetermined problem. The large number of theoretically possible interactions between transcription factors (TFs) and their targets far exceeds the number of independent measurements from which the true interactions can be inferred. Inference therefore results in many possible solutions that all explain the data equally well, but only a few of these solutions can be biologically true.Different state-of-the-art tools for network inference deal with underdetermination by using assumptions and simplifications that reduce the number of possible solutions in order to make the problem solvable.The strategy adopted to deal with the inference problem determines the aspects of the transcriptional network that is highlighted and the type of research question that can be answered. The outcome of network inference therefore varies greatly between tools.Fair benchmark studies are useful for guiding both users and developers. Most current studies combine validation based on an external standard with medium-throughput experiments to validate the extent to which known interactions can be recovered and reliable new interactions can be inferred.It is likely that no single best method exists, and different methods highlight complementary interaction types. Therefore, ensemble approaches, which aggregate the outcomes of several methods, offer a way to improve on the breadth and the accuracy of the predicted interactions.Future work in the light of novel data generation procedures will be to develop inference methods that exploit high-throughput information about regulation at levels other than transcription to mechanistically explain how genomic variations result in observed expression changes.},
  copyright = {2010 Springer Nature Limited},
  langid = {english},
  keywords = {Bacteria,Gene regulatory networks},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DM5UT4LS/nrmicro2419.pdf.pdf}
}

@article{devotoUnderstandingPlanningEcological2012,
  title = {Understanding and Planning Ecological Restoration of Plant--Pollinator Networks},
  author = {Devoto, Mariano and Bailey, Sallie and Craze, Paul and Memmott, Jane},
  year = 2012,
  journal = {Ecology Letters},
  volume = {15},
  number = {4},
  pages = {319--328},
  issn = {1461-0248},
  doi = {10.1111/j.1461-0248.2012.01740.x},
  urldate = {2024-08-20},
  abstract = {Ecology Letters (2012) 15: 319--328 Abstract Theory developed from studying changes in the structure and function of communities during natural or managed succession can guide the restoration of particular communities. We constructed 30 quantitative plant--flower visitor networks along a managed successional gradient to identify the main drivers of change in network structure. We then applied two alternative restoration strategies in silico (restoring for functional complementarity or redundancy) to data from our early successional plots to examine whether different strategies affected the restoration trajectories. Changes in network structure were explained by a combination of age, tree density and variation in tree diameter, even when variance explained by undergrowth structure was accounted for first. A combination of field data, a network approach and numerical simulations helped to identify which species should be given restoration priority in the context of different restoration targets. This combined approach provides a powerful tool for directing management decisions, particularly when management seeks to restore or conserve ecosystem function.},
  copyright = {\copyright{} 2012 Blackwell Publishing Ltd/CNRS},
  langid = {english},
  keywords = {Ecosystem function,functional complementarity,functional redundancy,pine forest,plant-animal interaction,plant-pollinator network,redundancy analysis,restoration,restoration strategy,succession},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/XY2INESI/Devoto et al. - 2012 - Understanding and planning ecological restoration of plant–pollinator networks.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/MWCIJ5TW/j.1461-0248.2012.01740.html}
}

@incollection{DivisiveAnalysisProgram1990,
  title = {Divisive {{Analysis}} ({{Program DIANA}})},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {253--279},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.ch6},
  urldate = {2024-09-13},
  abstract = {The prelims comprise: Short Description of the Method How to Use the Program DIANA Examples More on the Algorithm and the Program Related Methods and References},
  chapter = {6},
  isbn = {978-0-470-31680-1},
  langid = {english},
  keywords = {average dissimilarity,divisive analysis,divisive analysis algorithm,individual clusters,software packages},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/QPT7Z6J3/1990 - Divisive Analysis (Program DIANA).pdf}
}

@article{doreRelativeEffectsAnthropogenic2021,
  title = {Relative Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale},
  author = {Dor{\'e}, Ma{\"e}l and Fontaine, Colin and Th{\'e}bault, Elisa},
  year = 2021,
  journal = {Global Change Biology},
  volume = {27},
  number = {6},
  pages = {1266--1280},
  issn = {1365-2486},
  doi = {10.1111/gcb.15474},
  urldate = {2023-06-21},
  abstract = {Pollinators provide crucial ecosystem services that underpin to wild plant reproduction and yields of insect-pollinated crops. Understanding the relative impacts of anthropogenic pressures and climate on the structure of plant--pollinator interaction networks is vital considering ongoing global change and pollinator decline. Our ability to predict the consequences of global change for pollinator assemblages worldwide requires global syntheses, but these analytical approaches may be hindered by variable methods among studies that either invalidate comparisons or mask biological phenomena. Here we conducted a synthetic analysis that assesses the relative impact of anthropogenic pressures and climatic variability, and accounts for heterogeneity in sampling methodology to reveal network responses at the global scale. We analyzed an extensive dataset, comprising 295 networks over 123 locations all over the world, and reporting over 50,000 interactions between flowering plant species and their insect visitors. Our study revealed that anthropogenic pressures correlate with an increase in generalism in pollination networks while pollinator richness and taxonomic composition are more related to climatic variables with an increase in dipteran pollinator richness associated with cooler temperatures. The contrasting response of species richness and generalism of the plant--pollinator networks stresses the importance of considering interaction network structure alongside diversity in ecological monitoring. In addition, differences in sampling design explained more variation than anthropogenic pressures or climate on both pollination networks richness and generalism, highlighting the crucial need to report and incorporate sampling design in macroecological comparative studies of pollination networks. As a whole, our study reveals a potential human impact on pollination networks at a global scale. However, further research is needed to evaluate potential consequences of loss of specialist species and their unique ecological interactions and evolutionary pathways on the ecosystem pollination function at a global scale.},
  copyright = {\copyright{} 2020 John Wiley \& Sons Ltd},
  langid = {english},
  keywords = {anthropogenic pressures,climate,connectance,data,generalism,human impacts,plant-pollinator,pollination networks,richness,sampling effects,specialization},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/89ZXBJQP/10.1111@gcb.15474.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/IVR6RGG7/Doré et al. - 2021 - Relative effects of anthropogenic pressures, clima.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/DA3FRVPK/gcb.html;/home/louis/snap/zotero-snap/common/Zotero/storage/WSJ4DV98/gcb.html}
}

@article{dormannIndicesGraphsNull2009,
  title = {Indices, {{Graphs}} and {{Null Models}}: {{Analyzing Bipartite Ecological Networks}}},
  shorttitle = {Indices, {{Graphs}} and {{Null Models}}},
  author = {Dormann, Carsten F. and Frund, Jochen and Bluthgen, Nico and Gruber, Bernd},
  year = 2009,
  month = feb,
  journal = {TOECOLJ},
  volume = {2},
  number = {1},
  pages = {7--24},
  issn = {18742130},
  doi = {10.2174/1874213000902010007},
  urldate = {2025-09-18},
  abstract = {Many analyses of ecological networks in recent years have introduced new indices to describe network properties. As a consequence, tens of indices are available to address similar questions, differing in specific detail, sensitivity in detecting the property in question, and robustness with respect to network size and sampling intensity. Furthermore, some indices merely reflect the number of species participating in a network, but not their interrelationship, requiring a null model approach. Here we introduce a new, free software calculating a large spectrum of network indices, visualizing bipartite networks and generating null models. We use this tool to explore the sensitivity of 26 network indices to network dimensions, sampling intensity and singleton observations. Based on observed data, we investigate the interrelationship of these indices, and show that they are highly correlated, and heavily influenced by network dimensions and connectance. Finally, we re-evaluate five common hypotheses about network properties, comparing 19 pollination networks with three differently complex null models: 1. The number of links per species (``degree'') follow (truncated) power law distributions. 2. Generalist pollinators interact with specialist plants, and vice versa (dependence asymmetry). 3. Ecological networks are nested. 4. Pollinators display complementarity, owing to specialization within the network. 5. Plant-pollinator networks are more robust to extinction than random networks. Our results indicate that while some hypotheses hold up against our null models, others are to a large extent understandable on the basis of network size, rather than ecological interrelationships. In particular, null model pattern of dependence asymmetry and robustness to extinction are opposite to what current network paradigms suggest. Our analysis, and the tools we provide, enables ecologists to readily contrast their findings with null model expectations for many different questions, thus separating statistical inevitability from ecological process.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-18T13:47:50.536Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/IQWYN2GQ/Dormann et al. - 2009 - Indices, Graphs and Null Models Analyzing Bipartite Ecological Networks.pdf}
}

@article{elleUsePollinationNetworks2012,
  title = {The Use of Pollination Networks in Conservation{\textsuperscript{1}} {{This}} Article Is Part of a {{Special Issue}} Entitled ``{{Pollination}} Biology Research in {{Canada}}: {{Perspectives}} on a Mutualism at Different Scales''.},
  shorttitle = {The Use of Pollination Networks in Conservation{\textsuperscript{1}} {{This}} Article Is Part of a {{Special Issue}} Entitled ``{{Pollination}} Biology Research in {{Canada}}},
  author = {Elle, Elizabeth and Elwell, Sherri L. and Gielens, Grahame A.},
  year = 2012,
  month = jul,
  journal = {Botany},
  volume = {90},
  number = {7},
  pages = {525--534},
  issn = {1916-2790, 1916-2804},
  doi = {10.1139/b11-111},
  urldate = {2025-09-18},
  abstract = {Recent concern about declines in pollinating insects highlights the need for better understanding of plant--pollinator interactions. One promising approach at the community scale is network analysis, which allows actual interactions to be assessed, unlike biodiversity surveys, which only identify the potentially interacting organisms. We highlight useful network properties for conservation research and examples of their use in the study of rare species, invasive species, responses of communities to climate change, and habitat loss and restoration. We suggest that nestedness, degree, and interaction strength asymmetry are the most useful network properties for applied research on plant--pollinator interactions, but also highlight practical concerns regarding their measurement. We encourage the adoption of a network approach when an understanding of function within communities, rather than simple community composition, is useful for management.},
  copyright = {http://www.nrcresearchpress.com/page/about/CorporateTextAndDataMining},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-18T15:39:13.968Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/EVUPKFLQ/Elle et al. - 2012 - The use of pollination networks in conservation1 This article is part of a Special Issue.pdf}
}

@article{erdosRandomGraphs1959,
  title = {On Random Graphs. {{I}}.},
  author = {Erd{\H o}s, P. and R{\'e}nyi, A.},
  year = 1959,
  journal = {Publ. Math. Debrecen},
  volume = {6},
  number = {3-4},
  pages = {290--297},
  issn = {00333883},
  doi = {10.5486/PMD.1959.6.3-4.12},
  urldate = {2024-08-09},
  abstract = {Semantic Scholar extracted view of "On random graphs. I." by P. Erdos et al.},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WRSY3FZV/Erdős et Rényi - 2022 - On random graphs. I..pdf}
}

@article{faustOpenChallengesMicrobial2021a,
  title = {Open Challenges for Microbial Network Construction and Analysis},
  author = {Faust, Karoline},
  year = 2021,
  month = nov,
  journal = {The ISME Journal},
  volume = {15},
  number = {11},
  pages = {3111--3118},
  issn = {1751-7362},
  doi = {10.1038/s41396-021-01027-4},
  urldate = {2025-05-05},
  abstract = {Microbial network construction is a popular explorative data analysis technique in microbiome research. Although a large number of microbial network construction tools has been developed to date, there are several issues concerning the construction and interpretation of microbial networks that have received less attention. The purpose of this perspective is to draw attention to these underexplored challenges of microbial network construction and analysis.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-05T07:37:03.250Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/RJEV2EG6/Faust - 2021 - Open challenges for microbial network construction and analysis.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/Z84BYDAD/7474352.html}
}

@article{fisogniSeasonalTrajectoriesPlantpollinator2022,
  title = {Seasonal Trajectories of Plant-Pollinator Interaction Networks Differ Following Phenological Mismatches along an Urbanization Gradient},
  author = {Fisogni, Alessandro and Hautek{\`e}ete, Nina and Piquot, Yves and Brun, Marion and Vanappelghem, C{\'e}dric and Ohlmann, Marc and Franchomme, Magalie and Hinnewinkel, Christelle and Massol, Fran{\c c}ois},
  year = 2022,
  month = oct,
  journal = {Landscape and Urban Planning},
  volume = {226},
  pages = {104512},
  issn = {0169-2046},
  doi = {10.1016/j.landurbplan.2022.104512},
  urldate = {2025-05-14},
  abstract = {Urbanization may significantly alter the abundance, composition and phenology of natural communities of plants and pollinators. However, how such alterations eventually affect the structure of plant-pollinator interaction networks is still poorly known. Here, we investigate how the structure of plant-pollinator networks changes along an urbanization gradient, which coincides with a phenological mismatch between plants and pollinators. We examined changes in plant-pollinator network structure in 12 sites sown with standardized native flower mixes along an urbanization gradient in a metropolis in Northern France. We used network-level metrics in combination with more detailed methodologies to identify changes in network structure, species clustering, and species roles through urban classes and time. We also evaluated the temporal trajectories of {$\alpha$}- and {$\beta$}-diversity of species and interactions along the gradient. Network-level metrics showed limited spatial--temporal variability in the connectance, distribution of interactions and network-level specialization. Finer-scale analyses showed that generalist plant and pollinator species with long phenology were the most central and played key roles in defining the composition of cohesive groups of interacting species in all networks. Network motifs and species positions showed higher temporal variability in less urbanized areas, and interactions were more dissimilar between urbanization classes earlier in the season. We showed evidence of alterations in plant-pollinator network structure across space and time along an urbanization gradient, likely driven by the significant advancement in flowering phenology observed in the more urbanized areas. Our results emphasize the importance of targeted measures to maintain functional plant-pollinator communities, especially early in the season in highly urbanized areas.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-14T20:18:00.025Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/CCJWEIBD/Fisogni et al. - 2022 - Seasonal trajectories of plant-pollinator interaction networks differ following phenological mismatc.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/HMUM8AMZ/S016920462200161X.html}
}

@misc{flamaryPOTPythonOptimal2025,
  title = {{{POT Python Optimal Transport}}},
  author = {Flamary, R{\'e}mi and {Vincent-Cuaz}, C{\'e}dric and Courty, Nicolas and Gramfort, Alexandre and Kachaiev, Oleksii and Quang Tran, Huy and David, Laur{\`e}ne and Bonet, Cl{\'e}ment and Cassereau, Nathan and Gnassounou, Th{\'e}o and Tanguy, Eloi and Delon, Julie and Collas, Antoine and Mazelet, Sonia and Chapel, Laetitia and Kerdoncuff, Tanguy and Yu, Xizheng and Feickert, Matthew and Krzakala, Paul and Liu, Tianlin and Fernandes Montesuma, Eduardo},
  year = 2025,
  month = jan,
  urldate = {2025-01-28},
  abstract = {POT : Python Optimal Transport},
  copyright = {MIT}
}

@article{fortes-limaComplexGeneticAdmixture2021a,
  title = {Complex Genetic Admixture Histories Reconstructed with {{Approximate Bayesian Computation}}},
  author = {{Fortes-Lima}, Cesar A. and Laurent, Romain and Thouzeau, Valentin and Toupance, Bruno and Verdu, Paul},
  year = 2021,
  month = may,
  journal = {Mol Ecol Resour},
  volume = {21},
  number = {4},
  pages = {1098--1117},
  issn = {1755-0998},
  doi = {10.1111/1755-0998.13325},
  abstract = {Admixture is a fundamental evolutionary process that has influenced genetic patterns in numerous species. Maximum-likelihood approaches based on allele frequencies and linkage-disequilibrium have been extensively used to infer admixture processes from genome-wide data sets, mostly in human populations. Nevertheless, complex admixture histories, beyond one or two pulses of admixture, remain methodologically challenging to reconstruct. We developed an Approximate Bayesian Computation (ABC) framework to reconstruct highly complex admixture histories from independent genetic markers. We built the software package MetHis to simulate independent SNPs or microsatellites in a two-way admixed population for scenarios with multiple admixture pulses, monotonically decreasing or increasing recurring admixture, or combinations of these scenarios. MetHis allows users to draw model-parameter values from prior distributions set by the user, and, for each simulation, MetHis can calculate numerous summary statistics describing genetic diversity patterns and moments of the distribution of individual admixture fractions. We coupled MetHis with existing machine-learning ABC algorithms and investigated the admixture history of admixed populations. Results showed that random forest ABC scenario-choice could accurately distinguish among most complex admixture scenarios, and errors were mainly found in regions of the parameter space where scenarios were highly nested, and, thus, biologically similar. We focused on African American and Barbadian populations as two study-cases. We found that neural network ABC posterior parameter estimation was accurate and reasonably conservative under complex admixture scenarios. For both admixed populations, we found that monotonically decreasing contributions over time, from Europe and Africa, explained the observed data more accurately than multiple admixture pulses. This approach will allow for reconstructing detailed admixture histories when maximum-likelihood methods are intractable.},
  langid = {english},
  pmcid = {PMC8247995},
  pmid = {33452723},
  keywords = {admixture,Africa,Algorithms,Approximate Bayesian Computation,Barbados,Bayes Theorem,Black or African American,Computational Biology,Computer Simulation,Europe,Genetic Variation,Genetics Population,Humans,inference,Likelihood Functions,Machine Learning,machine-learning,Microsatellite Repeats,Models Genetic,Polymorphism Single Nucleotide,population genetics,Software},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-07T13:23:36.649Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/C4E9WA2A/Fortes-Lima et al. - 2021 - Complex genetic admixture histories reconstructed with Approximate Bayesian Computation.pdf}
}

@article{fosdickMultiresolutionNetworkModels2019,
  title = {Multiresolution {{Network Models}}},
  author = {Fosdick, Bailey K. and McCormick, Tyler H. and Murphy, Thomas Brendan and Ng, Tin Lok James and Westling, Ted},
  year = 2019,
  month = jan,
  journal = {Journal of Computational and Graphical Statistics},
  volume = {28},
  number = {1},
  pages = {185--196},
  issn = {1061-8600, 1537-2715},
  doi = {10.1080/10618600.2018.1505633},
  urldate = {2026-01-23},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-01-23T12:38:22.053Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/9U72B7ER/Fosdick et al. - 2019 - Multiresolution Network Models.pdf}
}

@article{friedmanInferringCorrelationNetworks2012,
  title = {Inferring {{Correlation Networks}} from {{Genomic Survey Data}}},
  author = {Friedman, Jonathan and Alm, Eric J.},
  editor = {Von Mering, Christian},
  year = 2012,
  month = sep,
  journal = {PLoS Comput Biol},
  volume = {8},
  number = {9},
  pages = {e1002687},
  issn = {1553-7358},
  doi = {10.1371/journal.pcbi.1002687},
  urldate = {2025-10-06},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-10-06T15:06:43.620Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R2U7UX9B/Friedman et Alm - 2012 - Inferring Correlation Networks from Genomic Survey Data.pdf}
}

@incollection{Frontmatter1990,
  title = {Frontmatter},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {i-xiv},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.fmatter},
  urldate = {2024-09-13},
  abstract = {The prelims comprise: Half Title Title Copyright Preface Contents},
  isbn = {978-0-470-31680-1},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/FMSENR3S/1990 - Frontmatter.pdf}
}

@article{funkeStochasticBlockModels2019,
  title = {Stochastic Block Models: {{A}} Comparison of Variants and Inference Methods},
  shorttitle = {Stochastic Block Models},
  author = {Funke, Thorben and Becker, Till},
  year = 2019,
  month = apr,
  journal = {PLOS ONE},
  volume = {14},
  number = {4},
  pages = {e0215296},
  publisher = {Public Library of Science},
  issn = {1932-6203},
  doi = {10.1371/journal.pone.0215296},
  urldate = {2025-01-26},
  abstract = {Finding communities in complex networks is a challenging task and one promising approach is the Stochastic Block Model (SBM). But the influences from various fields led to a diversity of variants and inference methods. Therefore, a comparison of the existing techniques and an independent analysis of their capabilities and weaknesses is needed. As a first step, we review the development of different SBM variants such as the degree-corrected SBM of Karrer and Newman or Peixoto's hierarchical SBM. Beside stating all these variants in a uniform notation, we show the reasons for their development. Knowing the variants, we discuss a variety of approaches to infer the optimal partition like the Metropolis-Hastings algorithm. We perform our analysis based on our extension of the Girvan-Newman test and the Lancichinetti-Fortunato-Radicchi benchmark as well as a selection of some real world networks. Using these results, we give some guidance to the challenging task of selecting an inference method and SBM variant. In addition, we give a simple heuristic to determine the number of steps for the Metropolis-Hastings algorithms that lack a usual stop criterion. With our comparison, we hope to guide researches in the field of SBM and highlight the problem of existing techniques to focus future research. Finally, by making our code freely available, we want to promote a faster development, integration and exchange of new ideas.},
  langid = {english},
  keywords = {a lire,Algorithms,Community structure,Computer networks,Graphs,Hierarchical clustering,Metadata,Probability distribution,Simulated annealing},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/C8IN9UTG/Funke et Becker - 2019 - Stochastic block models A comparison of variants and inference methods.pdf}
}

@incollection{FuzzyAnalysisProgram1990,
  title = {Fuzzy {{Analysis}} ({{Program FANNY}})},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {164--198},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.ch4},
  urldate = {2024-09-13},
  abstract = {The prelims comprise: The Purpose of Fuzzy Clustering How to Use the Program FANNY Examples More on the Algorithm and the Program Related Methods and References},
  chapter = {4},
  isbn = {978-0-470-31680-1},
  langid = {english},
  keywords = {data set,fuzzy analysis,interactive session,membership coefficients,silhouette width},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8HMFB8MC/1990 - Fuzzy Analysis (Program FANNY).pdf}
}

@article{gallagherSpectralEmbeddingWeighted2024,
  title = {Spectral {{Embedding}} of {{Weighted Graphs}}},
  author = {Gallagher, Ian and Jones, Andrew and Bertiger, Anna and Priebe, Carey E. and {Rubin-Delanchy}, Patrick},
  year = 2024,
  month = jul,
  journal = {Journal of the American Statistical Association},
  volume = {119},
  number = {547},
  pages = {1923--1932},
  publisher = {Taylor \& Francis},
  issn = {0162-1459},
  doi = {10.1080/01621459.2023.2225239},
  urldate = {2026-01-12},
  abstract = {When analyzing weighted networks using spectral embedding, a judicious transformation of the edge weights may produce better results. To formalize this idea, we consider the asymptotic behavior of spectral embedding for different edge-weight representations, under a generic low rank model. We measure the quality of different embeddings---which can be on entirely different scales---by how easy it is to distinguish communities, in an information-theoretical sense. For common types of weighted graphs, such as count networks or p-value networks, we find that transformations such as tempering or thresholding can be highly beneficial, both in theory and in practice. Supplementary materials for this article are available online.},
  keywords = {Chernoff information,Gaussian mixture model,Matrix factorization,Network,Stochastic block model},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-01-12T15:12:12.952Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WXLDF9VD/Gallagher et al. - 2024 - Spectral Embedding of Weighted Graphs.pdf}
}

@article{gantzHighlyEfficientCas9mediated2015,
  title = {Highly Efficient {{Cas9-mediated}} Gene Drive for Population Modification of the Malaria Vector Mosquito {{Anopheles}} Stephensi},
  author = {Gantz, Valentino M. and Jasinskiene, Nijole and Tatarenkova, Olga and Fazekas, Aniko and Macias, Vanessa M. and Bier, Ethan and James, Anthony A.},
  year = 2015,
  month = dec,
  journal = {Proceedings of the National Academy of Sciences},
  volume = {112},
  number = {49},
  pages = {E6736-E6743},
  publisher = {Proceedings of the National Academy of Sciences},
  doi = {10.1073/pnas.1521077112},
  urldate = {2024-09-04},
  abstract = {Genetic engineering technologies can be used both to create transgenic mosquitoes carrying antipathogen effector genes targeting human malaria parasites and to generate gene-drive systems capable of introgressing the genes throughout wild vector populations. We developed a highly effective autonomous Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR)-associated protein 9 (Cas9)-mediated gene-drive system in the Asian malaria vector Anopheles stephensi, adapted from the mutagenic chain reaction (MCR). This specific system results in progeny of males and females derived from transgenic males exhibiting a high frequency of germ-line gene conversion consistent with homology-directed repair (HDR). This system copies an {$\sim$}17-kb construct from its site of insertion to its homologous chromosome in a faithful, site-specific manner. Dual anti-Plasmodium falciparum effector genes, a marker gene, and the autonomous gene-drive components are introgressed into {$\sim$}99.5\% of the progeny following outcrosses of transgenic lines to wild-type mosquitoes. The effector genes remain transcriptionally inducible upon blood feeding. In contrast to the efficient conversion in individuals expressing Cas9 only in the germ line, males and females derived from transgenic females, which are expected to have drive component molecules in the egg, produce progeny with a high frequency of mutations in the targeted genome sequence, resulting in near-Mendelian inheritance ratios of the transgene. Such mutant alleles result presumably from nonhomologous end-joining (NHEJ) events before the segregation of somatic and germ-line lineages early in development. These data support the design of this system to be active strictly within the germ line. Strains based on this technology could sustain control and elimination as part of the malaria eradication agenda.},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/VZYJSG6X/Gantz et al. - 2015 - Highly efficient Cas9-mediated gene drive for population modification of the malaria vector mosquito.pdf}
}

@misc{garcia-callejasEcologicalNetworksInteraction2024,
  title = {Ecological Networks across Interaction Types Are Modular and Highly Driven by Sampling Intensity at Biogeographical Scales},
  author = {{Garcia-Callejas}, David and Thebault, Elisa and Lajaaiti, Ismael and Martins, Lucas P. and Laux, Louise and Kefi, Sonia},
  year = 2024,
  month = dec,
  publisher = {Ecology},
  doi = {10.1101/2024.12.04.626839},
  urldate = {2024-12-10},
  abstract = {Understanding how the structure of ecological communities varies across biotic and abiotic dimensions is a fundamental goal in ecology. This challenge is now approachable due to the increasing availability of data on community structure across the globe. Ecological communities are often defined with respect to the guilds considered and the interactions they engage in, but it is unclear whether interactions of different types respond similarly to large-scale environmental gradients. Therefore, we don't know whether there exist differences in how the emergent structure of ecological networks varies across biogeographical gradients, depending on their constituent interaction types. Here, using a unique dataset of 952 networks across the globe, we provide a first comparison of network structural metrics and their large-scale variability for five overarching interaction types (feeding, frugivory, herbivory, parasitism, pollination). We show that networks of different types tend to be more modular than expected, but other structural metrics do not deviate from what is expected given the degree distributions of the networks. Our analysis also reveals that network sampling intensity is a particularly relevant factor influencing network degree distribution, and that food webs appear in general more sensitive to environmental factors than other interaction types. By analysing common descriptors from the degree distributions of ecological networks, this study underscores for the first time generalities and differences across different interaction types and their response to environmental, sampling, and anthropic factors.},
  archiveprefix = {Ecology},
  copyright = {http://creativecommons.org/licenses/by-nc-nd/4.0/},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/D24FQDJ2/Garcia-Callejas et al. - 2024 - Ecological networks across interaction types are modular and highly driven by sampling intensity at.pdf}
}

@article{gibsonSamplingMethodInfluences2011,
  title = {Sampling Method Influences the Structure of Plant--Pollinator Networks},
  author = {Gibson, Rachel H. and Knott, Ben and Eberlein, Tim and Memmott, Jane},
  year = 2011,
  journal = {Oikos},
  volume = {120},
  number = {6},
  pages = {822--831},
  issn = {1600-0706},
  doi = {10.1111/j.1600-0706.2010.18927.x},
  urldate = {2025-03-24},
  abstract = {The search for general properties in the structure of ecological networks is currently a very active area of research. Meta-analyses of published networks are a widely used technique. To have the best chance of discovering common properties though, networks should be constructed using a standardized approach. However, this is rarely the case, and pollination networks are constructed using two main methods: transects and timed observations. To investigate the potential for variation in network structure arising from different construction techniques we constructed plant--pollinator networks using two different methods at a single site, repeating our protocol over three field seasons. Transects and timed observation methods differ in the evenness of observation effort allocated among plant species in the observed community. We show that the uneven allocation of observation effort significantly affects the number of unique interactions in the network, and we reveal a strong trend in effects on web asymmetry and evenness of marginal abundance distributions. However, these effects do not appear to extend to the higher-order properties of connectance and nestedness.},
  copyright = {\copyright{} 2011 The Authors},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BI4T5E29/Gibson et al. - 2011 - Sampling method influences the structure of plant–pollinator networks.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/Q4RH8QGE/j.1600-0706.2010.18927.html}
}

@inproceedings{gilmerNeuralMessagePassing2017,
  title = {Neural {{Message Passing}} for {{Quantum Chemistry}}},
  booktitle = {Proceedings of the 34th {{International Conference}} on {{Machine Learning}}},
  author = {Gilmer, Justin and Schoenholz, Samuel S. and Riley, Patrick F. and Vinyals, Oriol and Dahl, George E.},
  year = 2017,
  month = jul,
  pages = {1263--1272},
  publisher = {PMLR},
  issn = {2640-3498},
  urldate = {2024-05-15},
  abstract = {Supervised learning on molecules has incredible potential to be useful in chemistry, drug discovery, and materials science. Luckily, several promising and closely related neural network models invariant to molecular symmetries have already been described in the literature. These models learn a message passing algorithm and aggregation procedure to compute a function of their entire input graph. At this point, the next step is to find a particularly effective variant of this general approach and apply it to chemical prediction benchmarks until we either solve them or reach the limits of the approach. In this paper, we reformulate existing models into a single common framework we call Message Passing Neural Networks (MPNNs) and explore additional novel variations within this framework. Using MPNNs we demonstrate state of the art results on an important molecular property prediction benchmark; these results are strong enough that we believe future work should focus on datasets with larger molecules or more accurate ground truth labels.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/B45XI65B/Gilmer et al. - 2017 - Neural Message Passing for Quantum Chemistry.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/JNAIYUKE/Gilmer et al. - 2017 - Neural Message Passing for Quantum Chemistry.pdf}
}

@misc{glasscockWhatGraphon2016,
  title = {What Is a Graphon?},
  author = {Glasscock, Daniel},
  year = 2016,
  month = nov,
  number = {arXiv:1611.00718},
  eprint = {1611.00718},
  primaryclass = {math},
  publisher = {arXiv},
  urldate = {2024-10-28},
  abstract = {Graphons, short for graph functions, are limiting objects for sequences of large, finite graphs with respect to the so-called cut metric. In this expository piece, we define graphons, motivate them, and discuss how they complete the space of finite graphs. We conclude by stating three theorems that connect the finite world of graphs with the continuous world of graphons.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Mathematics - Combinatorics},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/M7WSR5JU/Glasscock - 2016 - What is a graphon.pdf}
}

@article{gonzalezSolvingInverseProblems2022,
  title = {Solving {{Inverse Problems}} by {{Joint Posterior Maximization}} with {{Autoencoding Prior}}},
  author = {Gonz{\'a}lez, Mario and Almansa, Andr{\'e}s and Tan, Pauline},
  year = 2022,
  month = jun,
  journal = {SIAM J. Imaging Sci.},
  volume = {15},
  number = {2},
  eprint = {2103.01648},
  primaryclass = {stat.ML},
  pages = {822--859},
  issn = {1936-4954},
  doi = {10.1137/21M140225X},
  urldate = {2026-05-28},
  abstract = {In this work we address the problem of solving ill-posed inverse problems in imaging where the prior is a variational autoencoder (VAE). Specifically we consider the decoupled case where the prior is trained once and can be reused for many different log-concave degradation models without retraining. Whereas previous MAP-based approaches to this problem lead to highly non-convex optimization algorithms, our approach computes the joint (space-latent) MAP that naturally leads to alternate optimization algorithms and to the use of a stochastic encoder to accelerate computations. The resulting technique (JPMAP) performs Joint Posterior Maximization using an Autoencoding Prior. We show theoretical and experimental evidence that the proposed objective function is quite close to bi-convex. Indeed it satisfies a weak bi-convexity property which is sufficient to guarantee that our optimization scheme converges to a stationary point. We also highlight the importance of correctly training the VAE using a denoising criterion, in order to ensure that the encoder generalizes well to out-of-distribution images, without affecting the quality of the generative model. This simple modification is key to providing robustness to the whole procedure. Finally we show how our joint MAP methodology relates to more common MAP approaches, and we propose a continuation scheme that makes use of our JPMAP algorithm to provide more robust MAP estimates. Experimental results also show the higher quality of the solutions obtained by our JPMAP approach with respect to other non-convex MAP approaches which more often get stuck in spurious local optima.},
  archiveprefix = {arXiv},
  keywords = {/unread,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Electrical Engineering and Systems Science - Image and Video Processing,Mathematics - Optimization and Control,Statistics - Machine Learning},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-28T13:00:10.375Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/FKP6L6D5/González et al. - 2022 - Solving Inverse Problems by Joint Posterior Maximization with Autoencoding Prior.pdf}
}

@article{govaertBlockClusteringBernoulli2008,
  title = {Block Clustering with {{Bernoulli}} Mixture Models: {{Comparison}} of Different Approaches},
  shorttitle = {Block Clustering with {{Bernoulli}} Mixture Models},
  author = {Govaert, G{\'e}rard and Nadif, Mohamed},
  year = 2008,
  month = feb,
  journal = {Computational Statistics \& Data Analysis},
  volume = {52},
  number = {6},
  pages = {3233--3245},
  issn = {0167-9473},
  doi = {10.1016/j.csda.2007.09.007},
  urldate = {2024-11-18},
  abstract = {The block or simultaneous clustering problem on a set of objects and a set of variables is embedded in the mixture model. Two algorithms have been developed: block EM as part of the maximum likelihood and fuzzy approaches, and block CEM as part of the classification maximum likelihood approach. A unified framework for obtaining different variants of block EM is proposed. These variants are studied and their performances evaluated in comparison with block CEM, two-way EM and two-way CEM, i.e EM and CEM applied separately to the two sets.},
  keywords = {Block mixture model,Co-clustering,EM algorithm,Latent block model,Simultaneous clustering},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/JF64S2R5/S0167947307003441.html;/home/louis/snap/zotero-snap/common/Zotero/storage/K8WR4ETZ/S0167947307003441.html}
}

@article{govaertClusteringBlockMixture2003,
  title = {Clustering with Block Mixture Models},
  author = {Govaert, G{\'e}rard and Nadif, Mohamed},
  year = 2003,
  month = feb,
  journal = {Pattern Recognition},
  series = {Biometrics},
  volume = {36},
  number = {2},
  pages = {463--473},
  issn = {0031-3203},
  doi = {10.1016/S0031-3203(02)00074-2},
  urldate = {2024-11-04},
  abstract = {Basing cluster analysis on mixture models has become a classical and powerful approach. Until now, this approach, which allows to explain some classic clustering criteria such as the well-known k-means criteria and to propose general criteria, has been developed to classify a set of objects measured on a set of variables. But, for this kind of data, if most clustering procedures are designated to construct an optimal partition of objects or, sometimes, of variables, there exist others methods, named block clustering methods, which consider simultaneously the two sets and organize the data into homogeneous blocks. In this work, a new mixture model called block mixture model is proposed to take into account this situation. This model allows to embed simultaneous clustering of objects and variables in a mixture approach. We first consider this probabilistic model in a general context and we develop a new algorithm of simultaneous partitioning based on the CEM algorithm. Then, we focus on the case of binary data and we show that our approach allows us to extend a block clustering method, which had been proposed in this case. Simplicity, fast convergence and the possibility to process large data sets are the major advantages of the proposed approach.},
  keywords = {Block CEM algorithm,Block mixture model,Clustering,EM algorithm,Latent block model,Mixture model},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/9IAIVE73/Govaert et Nadif - 2003 - Clustering with block mixture models.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/ATXZ6AV7/S0031320302000742.html;/home/louis/snap/zotero-snap/common/Zotero/storage/HYHS4ZRY/S0031320302000742.html}
}

@article{govaertEMAlgorithmBlock2005,
  title = {An {{EM}} Algorithm for the Block Mixture Model},
  author = {Govaert, G. and Nadif, M.},
  year = 2005,
  month = apr,
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume = {27},
  number = {4},
  pages = {643--647},
  issn = {1939-3539},
  doi = {10.1109/TPAMI.2005.69},
  abstract = {Although many clustering procedures aim to construct an optimal partition of objects or, sometimes, of variables, there are other methods, called block clustering methods, which consider simultaneously the two sets and organize the data into homogeneous blocks. Recently, we have proposed a new mixture model called block mixture model which takes into account this situation. This model allows one to embed simultaneous clustering of objects and variables in a mixture approach. We have studied this probabilistic model under the classification likelihood approach and developed a new algorithm for simultaneous partitioning based on the classification EM algorithm. In this paper, we consider the block clustering problem under the maximum likelihood approach and the goal of our contribution is to estimate the parameters of this model. Unfortunately, the application of the EM algorithm for the block mixture model cannot be made directly; difficulties arise due to the dependence structure in the model and approximations are required. Using a variational approximation, we propose a generalized EM algorithm to estimate the parameters of the block mixture model and, to illustrate our approach, we study the case of binary data by using a Bernoulli block mixture.},
  keywords = {Approximation algorithms,Classification algorithms,Clustering algorithms,Clustering methods,Data mining,EM algorithm,Index Terms- Block mixture model,Maximum likelihood estimation,Parameter estimation,Partitioning algorithms,Self organizing feature maps,Sparse matrices,variational approximation.},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/6IG45HH2/govaert2005.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/TL8M3XRF/Govaert et Nadif - 2005 - An EM algorithm for the block mixture model.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/2Y48IB26/1401917.html}
}

@article{govaertLatentBlockModel2010,
  title = {Latent {{Block Model}} for {{Contingency Table}}},
  author = {Govaert, G{\'e}rard and Nadif, Mohamed},
  year = 2010,
  month = jan,
  journal = {Communications in Statistics - Theory and Methods},
  volume = {39},
  number = {3},
  pages = {416--425},
  publisher = {Taylor \& Francis},
  issn = {0361-0926},
  doi = {10.1080/03610920903140197},
  urldate = {2023-06-15},
  abstract = {Although many clustering procedures aim to construct an optimal partition of objects or, sometimes, variables, there are other methods, called block clustering methods, which simultaneously consider the two sets and organize the data into homogeneous blocks. This kind of method has practical importance in a wide variety of applications such as text and market basket data analysis. Typically, the data that arise in these applications are arranged as a two-way contingency table. Using Poisson distributions, a latent block model for these data is proposed and, setting it under the maximum likelihood approach and the classification maximum likelihood approach, various algorithms are provided. Their performances are evaluated and compared to a simple use of EM or CEM applied separately on the rows and columns of the contingency table.},
  keywords = {62H17,62H30,Block clustering,Block Poisson mixture model,CEM algorithm,Contingency table,EM algorithm},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/PPHP33Z9/Govaert et Nadif - 2010 - Latent Block Model for Contingency Table.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/UT8TARCX/govaert2010.pdf.pdf}
}

@misc{GrossSBMColSBM2025,
  title = {{{GrossSBM}}/{{colSBM}}},
  year = 2025,
  month = jul,
  urldate = {2025-09-25},
  abstract = {R package for the joint stochastic blockmodeling of collection of networks},
  howpublished = {Gro\ss BM},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-25T11:59:36.376Z}
}

@article{gusevaDiversityComplexityMicrobial2022a,
  title = {From Diversity to Complexity: {{Microbial}} Networks in Soils},
  shorttitle = {From Diversity to Complexity},
  author = {Guseva, Ksenia and Darcy, Sean and Simon, Eva and Alteio, Lauren V. and {Montesinos-Navarro}, Alicia and Kaiser, Christina},
  year = 2022,
  month = jun,
  journal = {Soil Biology and Biochemistry},
  volume = {169},
  pages = {108604},
  issn = {0038-0717},
  doi = {10.1016/j.soilbio.2022.108604},
  urldate = {2025-05-06},
  abstract = {Network analysis has been used for many years in ecological research to analyze organismal associations, for example in food webs, plant-plant or plant-animal interactions. Although network analysis is widely applied in microbial ecology, only recently has it entered the realms of soil microbial ecology, shown by a rapid rise in studies applying co-occurrence analysis to soil microbial communities. While this application offers great potential for deeper insights into the ecological structure of soil microbial ecosystems, it also brings new challenges related to the specific characteristics of soil datasets and the type of ecological questions that can be addressed. In this Perspectives Paper we assess the challenges of applying network analysis to soil microbial ecology due to the small-scale heterogeneity of the soil environment and the nature of soil microbial datasets. We review the different approaches of network construction that are commonly applied to soil microbial datasets and discuss their features and limitations. Using a test dataset of microbial communities from two depths of a forest soil, we demonstrate how different experimental designs and network constructing algorithms affect the structure of the resulting networks, and how this in turn may influence ecological conclusions. We will also reveal how assumptions of the construction method, methods of preparing the dataset, and definitions of thresholds affect the network structure. Finally, we discuss the particular questions in soil microbial ecology that can be approached by analyzing and interpreting specific network properties. Targeting these network properties in a meaningful way will allow applying this technique not in merely descriptive, but in hypothesis-driven research. Analysing microbial networks in soils opens a window to a better understanding of the complexity of microbial communities. However, this approach is unfortunately often used to draw conclusions which are far beyond the scientific evidence it can provide, which has damaged its reputation for soil microbial analysis. In this Perspectives Paper, we would like to sharpen the view for the real potential of microbial co-occurrence analysis in soils, and at the same time raise awareness regarding its limitations and the many ways how it can be misused or misinterpreted.},
  keywords = {Co-occurrence networks,Ecological networks,Microbial community structure,Microbial network analysis,Soil microbial ecology},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-07T07:43:04.957Z}
}

@article{hamiltonInductiveRepresentationLearning,
  title = {Inductive {{Representation Learning}} on {{Large Graphs}}},
  author = {Hamilton, Will and Ying, Zhitao and Leskovec, Jure},
  abstract = {Low-dimensional embeddings of nodes in large graphs have proved extremely useful in a variety of prediction tasks, from content recommendation to identifying protein functions. However, most existing approaches require that all nodes in the graph are present during training of the embeddings; these previous approaches are inherently transductive and do not naturally generalize to unseen nodes. Here we present GraphSAGE, a general inductive framework that leverages node feature information (e.g., text attributes) to efficiently generate node embeddings for previously unseen data. Instead of training individual embeddings for each node, we learn a function that generates embeddings by sampling and aggregating features from a node's local neighborhood. Our algorithm outperforms strong baselines on three inductive node-classification benchmarks: we classify the category of unseen nodes in evolving information graphs based on citation and Reddit post data, and we show that our algorithm generalizes to completely unseen graphs using a multi-graph dataset of protein-protein interactions.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/YIUG7VAU/Hamilton et al. - Inductive Representation Learning on Large Graphs.pdf}
}

@misc{hamiltonInductiveRepresentationLearning2018,
  title = {Inductive {{Representation Learning}} on {{Large Graphs}}},
  author = {Hamilton, William L. and Ying, Rex and Leskovec, Jure},
  year = 2018,
  month = sep,
  number = {arXiv:1706.02216},
  eprint = {1706.02216},
  primaryclass = {cs.SI},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1706.02216},
  urldate = {2026-06-10},
  abstract = {Low-dimensional embeddings of nodes in large graphs have proved extremely useful in a variety of prediction tasks, from content recommendation to identifying protein functions. However, most existing approaches require that all nodes in the graph are present during training of the embeddings; these previous approaches are inherently transductive and do not naturally generalize to unseen nodes. Here we present GraphSAGE, a general, inductive framework that leverages node feature information (e.g., text attributes) to efficiently generate node embeddings for previously unseen data. Instead of training individual embeddings for each node, we learn a function that generates embeddings by sampling and aggregating features from a node's local neighborhood. Our algorithm outperforms strong baselines on three inductive node-classification benchmarks: we classify the category of unseen nodes in evolving information graphs based on citation and Reddit post data, and we show that our algorithm generalizes to completely unseen graphs using a multi-graph dataset of protein-protein interactions.},
  archiveprefix = {arXiv},
  keywords = {/unread,Computer Science - Machine Learning,Computer Science - Social and Information Networks,Statistics - Machine Learning},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-10T15:49:15.404Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NPG9W4DV/Hamilton et al. - 2018 - Inductive Representation Learning on Large Graphs.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/W85VX37U/1706.html}
}

@article{hendersonDerivingInverseSum1981,
  title = {On {{Deriving}} the {{Inverse}} of a {{Sum}} of {{Matrices}}},
  author = {Henderson, H. V. and Searle, S. R.},
  year = 1981,
  journal = {SIAM Review},
  volume = {23},
  number = {1},
  eprint = {2029838},
  eprinttype = {jstor},
  pages = {53--60},
  publisher = {{Society for Industrial and Applied Mathematics}},
  issn = {0036-1445},
  urldate = {2025-10-22},
  abstract = {Available expressions are reviewed and new ones derived for the inverse of the sum of two matrices, one of them being nonsingular. Particular attention is given to (A + UBV)-1, where A is nonsingular and U, B and V may be rectangular; generalized inverses of A + UBV are also considered. Several statistical applications are discussed.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-10-22T12:32:44.903Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8X2ATCH4/Henderson et Searle - 1981 - On Deriving the Inverse of a Sum of Matrices.pdf}
}

@article{heNetworkMappingRoot2021a,
  title = {Network Mapping of Root--Microbe Interactions in {{Arabidopsis}} Thaliana},
  author = {He, Xiaoqing and Zhang, Qi and Li, Beibei and Jin, Yi and Jiang, Libo and Wu, Rongling},
  year = 2021,
  month = sep,
  journal = {NPJ Biofilms Microbiomes},
  volume = {7},
  pages = {72},
  issn = {2055-5008},
  doi = {10.1038/s41522-021-00241-4},
  urldate = {2025-05-06},
  abstract = {Understanding how plants interact with their colonizing microbiota to determine plant phenotypes is a fundamental question in modern plant science. Existing approaches for genome-wide association studies (GWAS) are often focused on the association analysis between host genes and the abundance of individual microbes, failing to characterize the genetic bases of microbial interactions that are thought to be important for microbiota structure, organization, and function. Here, we implement a behavioral model to quantify various patterns of microbe-microbe interactions, i.e., mutualism, antagonism, aggression, and altruism, and map host genes that modulate microbial networks constituted by these interaction types. We reanalyze a root-microbiome data involving 179 accessions of Arabidopsis thaliana and find that the four networks differ structurally in the pattern of bacterial-fungal interactions and microbiome complexity. We identify several fungus and bacterial hubs that play a central role in mediating microbial community assembly surrounding A. thaliana root systems. We detect 1142 significant host genetic variants throughout the plant genome and then implement Bayesian networks (BN) to reconstruct epistatic networks involving all significant SNPs, of which 91 are identified as hub QTLs. Results from gene annotation analysis suggest that most of the hub QTLs detected are in proximity to candidate genes, executing a variety of biological functions in plant growth and development, resilience against pathogens, root development, and abiotic stress resistance. This study provides a new gateway to understand how genetic variation in host plants influences microbial communities and our results could help improve crops by harnessing soil microbes.},
  pmcid = {PMC8423736},
  pmid = {34493731},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-07T07:43:04.957Z}
}

@article{hoffAdditiveMultiplicativeEffects2021,
  title = {Additive and {{Multiplicative Effects Network Models}}},
  author = {Hoff, Peter},
  year = 2021,
  month = feb,
  journal = {Statist. Sci.},
  volume = {36},
  number = {1},
  issn = {0883-4237},
  doi = {10.1214/19-STS757},
  urldate = {2026-01-23},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-01-23T12:38:27.731Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/MBVWXGEX/Hoff - 2021 - Additive and Multiplicative Effects Network Models.pdf}
}

@article{hoffLatentSpaceApproaches2002,
  title = {Latent {{Space Approaches}} to {{Social Network Analysis}}},
  author = {Hoff, Peter D and Raftery, Adrian E and Handcock, Mark S},
  year = 2002,
  month = dec,
  journal = {Journal of the American Statistical Association},
  volume = {97},
  number = {460},
  pages = {1090--1098},
  publisher = {Taylor \& Francis},
  issn = {0162-1459},
  doi = {10.1198/016214502388618906},
  urldate = {2024-05-20},
  abstract = {Network models are widely used to represent relational information among interacting units. In studies of social networks, recent emphasis has been placed on random graph models where the nodes usually represent individual social actors and the edges represent the presence of a specified relation between actors. We develop a class of models where the probability of a relation between actors depends on the positions of individuals in an unobserved ``social space.'' We make inference for the social space within maximum likelihood and Bayesian frameworks, and propose Markov chain Monte Carlo procedures for making inference on latent positions and the effects of observed covariates. We present analyses of three standard datasets from the social networks literature, and compare the method to an alternative stochastic blockmodeling approach. In addition to improving on model fit for these datasets, our method provides a visual and interpretable model-based spatial representation of social relationships and improves on existing methods by allowing the statistical uncertainty in the social space to be quantified and graphically represented.},
  keywords = {Conditional independence model,Latent position model,Network data,Random graph,Visualization},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7UYRBBA2/Hoff et al. - 2002 - Latent Space Approaches to Social Network Analysis.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/P3DLLTQ2/Hoff et al. - 2002 - Latent Space Approaches to Social Network Analysis.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/R4TGSVGP/016214502388618906.pdf.pdf}
}

@article{hollandStochasticBlockmodelsFirst1983,
  title = {Stochastic Blockmodels: {{First}} Steps},
  shorttitle = {Stochastic Blockmodels},
  author = {Holland, Paul W. and Laskey, Kathryn Blackmond and Leinhardt, Samuel},
  year = 1983,
  month = jun,
  journal = {Social Networks},
  volume = {5},
  number = {2},
  pages = {109--137},
  issn = {0378-8733},
  doi = {10.1016/0378-8733(83)90021-7},
  urldate = {2023-06-15},
  abstract = {A stochastic model is proposed for social networks in which the actors in a network are partitioned into subgroups called blocks. The model provides a stochastic generalization of the blockmodel. Estimation techniques are developed for the special case of a single relation social network, with blocks specified a priori. An extension of the model allows for tendencies toward reciprocation of ties beyond those explained by the partition. The extended model provides a one degree-of-freedom test of the model. A numerical example from the social network literature is used to illustrate the methods.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/6F8YT8AD/holland1983.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/7DSZ3KD9/Holland et al. - 1983 - Stochastic blockmodels First steps.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/DUL2RV8Q/holland1983.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/G9KZBG9W/0378873383900217.html}
}

@article{hronImputationMissingValues2010,
  title = {Imputation of Missing Values for Compositional Data Using Classical and Robust Methods},
  author = {Hron, K. and Templ, M. and Filzmoser, P.},
  year = 2010,
  month = dec,
  journal = {Computational Statistics \& Data Analysis},
  volume = {54},
  number = {12},
  pages = {3095--3107},
  issn = {01679473},
  doi = {10.1016/j.csda.2009.11.023},
  urldate = {2026-04-17},
  abstract = {New imputation algorithms for estimating missing values in compositional data are introduced. A first proposal uses the k-nearest neighbor procedure based on the Aitchison distance, a distance measure especially designed for compositional data. It is important to adjust the estimated missing values to the overall size of the compositional parts of the neighbors. As a second proposal an iterative model-based imputation technique is introduced which initially starts from the result of the proposed k-nearest neighbor procedure. The method is based on iterative regressions, thereby accounting for the whole multivariate data information. The regressions have to be performed in a transformed space, and depending on the data quality classical or robust regression techniques can be employed. The proposed methods are tested on a real and on simulated data sets. The results show that the proposed methods outperform standard imputation methods. In the presence of outliers, the model-based method with robust regressions is preferable.},
  copyright = {https://www.elsevier.com/tdm/userlicense/1.0/},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-17T16:14:09.172Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/FR4TF52C/Hron et al. - 2010 - Imputation of missing values for compositional data using classical and robust methods.pdf}
}

@article{hubertComparingPartitions1985,
  title = {Comparing Partitions},
  author = {Hubert, Lawrence and Arabie, Phipps},
  year = 1985,
  month = dec,
  journal = {Journal of Classification},
  volume = {2},
  number = {1},
  pages = {193--218},
  issn = {1432-1343},
  doi = {10.1007/BF01908075},
  urldate = {2023-07-04},
  abstract = {The problem of comparing two different partitions of a finite set of objects reappears continually in the clustering literature. We begin by reviewing a well-known measure of partition correspondence often attributed to Rand (1971), discuss the issue of correcting this index for chance, and note that a recent normalization strategy developed by Morey and Agresti (1984) and adopted by others (e.g., Miligan and Cooper 1985) is based on an incorrect assumption. Then, the general problem of comparing partitions is approached indirectly by assessing the congruence of two proximity matrices using a simple cross-product measure. They are generated from corresponding partitions using various scoring rules. Special cases derivable include traditionally familiar statistics and/or ones tailored to weight certain object pairs differentially. Finally, we propose a measure based on the comparison of object triples having the advantage of a probabilistic interpretation in addition to being corrected for chance (i.e., assuming a constant value under a reasonable null hypothesis) and bounded between \textpm 1.},
  langid = {english},
  keywords = {Consensus indices,Measures of agreement,Measures of association},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7TKW7HEM/Hubert et Arabie - 1985 - Comparing partitions.pdf}
}

@incollection{Introduction1990,
  title = {Introduction},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {1--67},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.ch1},
  urldate = {2024-09-13},
  abstract = {The prelims comprise: Motivation Types of Data and How to Handle Them Which Clustering Algorithm to Choose A Schematic Overview of Our Programs Computing Dissimilarities with the Program DAISY},
  chapter = {1},
  isbn = {978-0-470-31680-1},
  langid = {english},
  keywords = {archeological findings,cluster analysis,interval-scaled variables,social sciences,spherical clusters},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ZPWRCT6C/1990 - Introduction.pdf}
}

@article{jianRestrictedTweedieStochastic,
  title = {Restricted {{Tweedie}} Stochastic Block Models},
  author = {Jian, Jie and Zhu, Mu and Sang, Peijun},
  journal = {Canadian Journal of Statistics},
  volume = {n/a},
  number = {n/a},
  pages = {e70012},
  issn = {1708-945X},
  doi = {10.1002/cjs.70012},
  urldate = {2026-01-07},
  abstract = {The stochastic block model (SBM) is a widely used framework for community detection in networks, where the network structure is typically represented by an adjacency matrix. However, conventional SBMs are not directly applicable to an adjacency matrix that consists of nonnegative zero-inflated continuous edge weights. To model the international trading network, where edge weights represent trading values between countries, we propose an SBM based on a restricted Tweedie distribution. Additionally, we incorporate nodal information, such as the geographical distance between countries, and account for its dynamic effect on edge weights. Notably, we show that given a sufficiently large number of nodes, estimating this covariate effect becomes independent of community labels of each node when computing the maximum likelihood estimator of parameters in our model. This result enables the development of an efficient two-step algorithm that separates the estimation of covariate effects from other parameters. We demonstrate the effectiveness of our proposed method through extensive simulation studies and an application to international trading data.},
  langid = {english},
  keywords = {Community detection,compound Poisson-Gamma distributions,dynamic effects,stochastic block models,variational inference},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-01-07T14:24:57.323Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/YWCLL9EY/Jian et al. - Restricted Tweedie stochastic block models.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/EXCVUCSI/cjs.html}
}

@article{jordanoBiodiversityPlantfrugivoreInteractions,
  title = {The Biodiversity of Plant-Frugivore Interactions: Types, Functions, and Consequences},
  author = {Jordano, Pedro},
  abstract = {Pairwise plant-frugivore mutualistic interactions build up into mega-diverse networks involving dozens of interacting species, being the most generalized among free-living species. These mutualisms consist of food provisioning by plants and, their counterpart, plant propagule (seeds) movement by the animals, being crucial for the natural vegetation regeneration in many ecosystems. Yet we are far from understanding which part of this enormous interaction biodiversity is needed for their maintenance. I overview the diversity of interaction modes involved in these mutualisms, the main components of the seed dispersal services, and their functional diversity. I examine how interaction richness covaries with partner species richness at different scales, resulting in variable patterns of species complementarities in terms of seed dispersal effects. The functionality of most generalized plant-frugivore mutualisms relies on complementarity of effects across a high diversity of partners, yet frequently depends on just a distinct subset of them, resulting in high functional redundancy. Two distinct aspects are relevant: 1) variable quantitative effects among species; 2) variable pairwise-interaction outcomes, between the extremes of antagonism and mutualism. Frugivory, occurring at the [inal stage of each plant reproductive episode, entails a large, cumulative, effect of other biotic interactions occurring at earlier stages (e.g., [loral herbivory, pollination, pre-dispersal fruit damage). I examine how plant-frugivore interactions mixup with the whole biotic interactome of a plant, using the Prunus mahaleb system as a case study. The effects of distinct subsets of frugivores combine with different sets of antagonistic and mutualistic partners in other interactions, yet having a lasting signal on [inal seed dispersal success.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-12-01T08:49:25.634Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BL3DAD7U/Jordano - The biodiversity of plant-frugivore interactions types, functions, and consequences.pdf}
}

@article{karrerStochasticBlockmodelsCommunity2011,
  title = {Stochastic Blockmodels and Community Structure in Networks},
  author = {Karrer, Brian and Newman, M. E. J.},
  year = 2011,
  month = jan,
  journal = {Phys. Rev. E},
  volume = {83},
  number = {1},
  eprint = {1008.3926},
  primaryclass = {physics},
  pages = {016107},
  issn = {1539-3755, 1550-2376},
  doi = {10.1103/PhysRevE.83.016107},
  urldate = {2025-09-26},
  abstract = {Stochastic blockmodels have been proposed as a tool for detecting community structure in networks as well as for generating synthetic networks for use as benchmarks. Most blockmodels, however, ignore variation in vertex degree, making them unsuitable for applications to real-world networks, which typically display broad degree distributions that can significantly distort the results. Here we demonstrate how the generalization of blockmodels to incorporate this missing element leads to an improved objective function for community detection in complex networks. We also propose a heuristic algorithm for community detection using this objective function or its non-degree-corrected counterpart and show that the degree-corrected version dramatically outperforms the uncorrected one in both real-world and synthetic networks.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Computer Science - Social and Information Networks,Condensed Matter - Statistical Mechanics,Physics - Data Analysis Statistics and Probability,Physics - Physics and Society},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-26T08:18:22.155Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HDPN46QR/Karrer et Newman - 2011 - Stochastic blockmodels and community structure in networks.pdf}
}

@article{kaszewska-gilasGlobalStudiesHostParasite2021,
  title = {Global {{Studies}} of the {{Host-Parasite Relationships}} between {{Ectoparasitic Mites}} of the {{Family Syringophilidae}} and {{Birds}} of the {{Order Columbiformes}}},
  author = {{Kaszewska-Gilas}, Katarzyna and Kosicki, Jakub Ziemowit and Hromada, Martin and Skoracki, Maciej},
  year = 2021,
  month = dec,
  journal = {Animals},
  volume = {11},
  number = {12},
  pages = {3392},
  publisher = {Multidisciplinary Digital Publishing Institute},
  issn = {2076-2615},
  doi = {10.3390/ani11123392},
  urldate = {2023-06-15},
  abstract = {The quill mites belonging to the family Syringophilidae (Acari: Prostigmata: Cheyletoidea) are obligate ectoparasites of birds. They inhabit different types of the quills, where they spend their whole life cycle. In this paper, we conducted a global study of syringophilid mites associated with columbiform birds. We examined 772 pigeon and dove individuals belonging to 112 species (35\% world fauna) from all zoogeographical regions (except Madagascan) where Columbiformes occur. We measured the prevalence (IP) and the confidence interval (CI) for all infested host species. IP ranges between 4.2 and 66.7 (CI 0.2--100). We applied a bipartite analysis to determine host--parasite interaction, network indices, and host specificity on species and whole network levels. The Syringophilidae--Columbiformes network was composed of 25 mite species and 65 host species. The bipartite network was characterized by a high network level specialization H2{$\prime$} = 0.93, high nestedness N = 0.908, connectance C = 0.90, and high modularity Q = 0.83, with 20 modules. Moreover, we reconstructed the phylogeny of the quill mites associated with columbiform birds on the generic level. Analysis shows two distinct clades: Meitingsunes + Psittaciphilus, and Peristerophila + Terratosyringophilus.},
  copyright = {http://creativecommons.org/licenses/by/3.0/},
  langid = {english},
  keywords = {Acari,biodiversity,bipartite-example,network,pigeons and doves,quill mites},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/UDZJF69Q/Kumpulainen et al. - 2024 - From your Block to our Block How to Find Shared Structure between Stochastic Block Models over Mult.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/VXVQ5CPH/Kaszewska-Gilas et al. - 2021 - Global Studies of the Host-Parasite Relationships .pdf}
}

@book{kaufmanFindingGroupsData1990,
  title = {Finding {{Groups}} in {{Data}}: {{An Introduction}} to {{Cluster Analysis}}},
  shorttitle = {Finding {{Groups}} in {{Data}}},
  author = {Kaufman, Leonard and Rousseeuw, Peter J.},
  year = 1990,
  month = mar,
  series = {Wiley {{Series}} in {{Probability}} and {{Statistics}}},
  edition = {1},
  publisher = {Wiley},
  doi = {10.1002/9780470316801},
  urldate = {2024-09-13},
  copyright = {http://doi.wiley.com/10.1002/tdm\_license\_1.1},
  isbn = {978-0-471-87876-6 978-0-470-31680-1},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HTL6RWZ7/Kaufman et Rousseeuw - 1990 - Finding Groups in Data An Introduction to Cluster Analysis.pdf}
}

@misc{kaurLatentPositionNetwork2023,
  title = {Latent {{Position Network Models}}},
  author = {Kaur, Hardeep and Rastelli, Riccardo and Friel, Nial and Raftery, Adrian E.},
  year = 2023,
  month = apr,
  number = {arXiv:2304.02979},
  eprint = {2304.02979},
  primaryclass = {stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2304.02979},
  urldate = {2026-01-23},
  abstract = {In this chapter, we present a review of latent position models for networks. We review the recent literature in this area and illustrate the basic aspects and properties of this modeling framework. Through several illustrative examples we highlight how the latent position model is able to capture important features of observed networks. We emphasize how the canonical design of this model has made it popular thanks to its ability to provide interpretable visualizations of complex network interactions. We outline the main extensions that have been introduced to this model, illustrating its flexibility and applicability.},
  archiveprefix = {arXiv},
  keywords = {Statistics - Methodology},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-01-23T09:21:03.948Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/T8GLNRP4/Kaur et al. - 2023 - Latent Position Network Models.pdf}
}

@article{keribinEstimationSelectionLatent2015,
  title = {Estimation and Selection for the Latent Block Model on Categorical Data},
  author = {Keribin, Christine and Brault, Vincent and Celeux, Gilles and Govaert, G{\'e}rard},
  year = 2015,
  month = nov,
  journal = {Stat Comput},
  volume = {25},
  number = {6},
  pages = {1201--1216},
  issn = {1573-1375},
  doi = {10.1007/s11222-014-9472-2},
  urldate = {2024-05-15},
  abstract = {This paper deals with estimation and model selection in the Latent Block Model (LBM) for categorical data. First, after providing sufficient conditions ensuring the identifiability of this model, we generalise estimation procedures and model selection criteria derived for binary data. Secondly, we develop Bayesian inference through Gibbs sampling and with a well calibrated non informative prior distribution, in order to get the MAP estimator: this is proved to avoid the traps encountered by the LBM with the maximum likelihood methodology. Then model selection criteria are presented. In particular an exact expression of the integrated completed likelihood criterion requiring no asymptotic approximation is derived. Finally numerical experiments on both simulated and real data sets highlight the appeal of the proposed estimation and model selection procedures.},
  langid = {english},
  keywords = {Bayesian inference,BIC criterion,EM algorithm,Gibbs sampling,Integrated completed likelihood,Stochastic EM,Variational approximation},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/49IKUHMA/s11222-014-9472-2.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/VXKAK359/Keribin et al. - 2015 - Estimation and selection for the latent block mode.pdf}
}

@article{kernighanEfficientHeuristicProcedure1970,
  title = {An Efficient Heuristic Procedure for Partitioning Graphs},
  author = {Kernighan, B. W. and Lin, S.},
  year = 1970,
  month = feb,
  journal = {The Bell System Technical Journal},
  volume = {49},
  number = {2},
  pages = {291--307},
  issn = {0005-8580},
  doi = {10.1002/j.1538-7305.1970.tb01770.x},
  urldate = {2025-01-26},
  abstract = {We consider the problem of partitioning the nodes of a graph with costs on its edges into subsets of given sizes so as to minimize the sum of the costs on all edges cut. This problem arises in several physical situations --- for example, in assigning the components of electronic circuits to circuit boards to minimize the number of connections between boards. This paper presents a heuristic method for partitioning arbitrary graphs which is both effective in finding optimal partitions, and fast enough to be practical in solving large problems.},
  keywords = {a lire},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/W2RM4C9T/6771089.html}
}

@misc{kingmaAutoEncodingVariationalBayes2022,
  title = {Auto-{{Encoding Variational Bayes}}},
  author = {Kingma, Diederik P. and Welling, Max},
  year = 2022,
  month = dec,
  number = {arXiv:1312.6114},
  eprint = {1312.6114},
  primaryclass = {cs, stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1312.6114},
  urldate = {2024-02-19},
  abstract = {How can we perform efficient inference and learning in directed probabilistic models, in the presence of continuous latent variables with intractable posterior distributions, and large datasets? We introduce a stochastic variational inference and learning algorithm that scales to large datasets and, under some mild differentiability conditions, even works in the intractable case. Our contributions are two-fold. First, we show that a reparameterization of the variational lower bound yields a lower bound estimator that can be straightforwardly optimized using standard stochastic gradient methods. Second, we show that for i.i.d. datasets with continuous latent variables per datapoint, posterior inference can be made especially efficient by fitting an approximate inference model (also called a recognition model) to the intractable posterior using the proposed lower bound estimator. Theoretical advantages are reflected in experimental results.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5YRDUYYW/Kingma et Welling - 2022 - Auto-Encoding Variational Bayes.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/ECSHPY3J/Kingma et Welling - 2022 - Auto-Encoding Variational Bayes.pdf;/home/polarolouis/Nextcloud/Documents/ZotFile/Kingma_Welling_2022_Auto-Encoding Variational Bayes.pdf;/home/polarolouis/Nextcloud/Documents/ZotFile/Kingma_Welling_2022_Auto-Encoding Variational Bayes2.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/9PQS9M4I/1312.html;/home/louis/snap/zotero-snap/common/Zotero/storage/JQSWP8WK/1312.html;/home/louis/snap/zotero-snap/common/Zotero/storage/MK5TFH4E/1312.html}
}

@misc{kipfSemiSupervisedClassificationGraph2017,
  title = {Semi-{{Supervised Classification}} with {{Graph Convolutional Networks}}},
  author = {Kipf, Thomas N. and Welling, Max},
  year = 2017,
  month = feb,
  number = {arXiv:1609.02907},
  eprint = {1609.02907},
  primaryclass = {cs, stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1609.02907},
  urldate = {2024-05-14},
  abstract = {We present a scalable approach for semi-supervised learning on graph-structured data that is based on an efficient variant of convolutional neural networks which operate directly on graphs. We motivate the choice of our convolutional architecture via a localized first-order approximation of spectral graph convolutions. Our model scales linearly in the number of graph edges and learns hidden layer representations that encode both local graph structure and features of nodes. In a number of experiments on citation networks and on a knowledge graph dataset we demonstrate that our approach outperforms related methods by a significant margin.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/SWWT37XC/Kipf et Welling - 2017 - Semi-Supervised Classification with Graph Convolut.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/6XSQ5U3D/1609.html}
}

@misc{kipfVariationalGraphAutoEncoders2016,
  title = {Variational {{Graph Auto-Encoders}}},
  author = {Kipf, Thomas N. and Welling, Max},
  year = 2016,
  month = nov,
  number = {arXiv:1611.07308},
  eprint = {1611.07308},
  primaryclass = {cs, stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1611.07308},
  urldate = {2024-05-14},
  abstract = {We introduce the variational graph auto-encoder (VGAE), a framework for unsupervised learning on graph-structured data based on the variational auto-encoder (VAE). This model makes use of latent variables and is capable of learning interpretable latent representations for undirected graphs. We demonstrate this model using a graph convolutional network (GCN) encoder and a simple inner product decoder. Our model achieves competitive results on a link prediction task in citation networks. In contrast to most existing models for unsupervised learning on graph-structured data and link prediction, our model can naturally incorporate node features, which significantly improves predictive performance on a number of benchmark datasets.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/MSK48ZUE/Kipf et Welling - 2016 - Variational Graph Auto-Encoders.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/3VJBSGI3/1611.html;/home/louis/snap/zotero-snap/common/Zotero/storage/BBTHQNRZ/1611.html}
}

@book{kolaczykStatisticalAnalysisNetwork2009,
  title = {Statistical {{Analysis}} of {{Network Data}}: {{Methods}} and {{Models}}},
  shorttitle = {Statistical {{Analysis}} of {{Network Data}}},
  author = {Kolaczyk, Eric D.},
  year = 2009,
  series = {Springer {{Series}} in {{Statistics}}},
  publisher = {Springer New York},
  address = {New York, NY},
  doi = {10.1007/978-0-387-88146-1},
  urldate = {2025-05-26},
  copyright = {https://www.springernature.com/gp/researchers/text-and-data-mining},
  isbn = {978-0-387-88145-4 978-0-387-88146-1},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-26T11:42:27.939Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/RQPMHFGB/Kolaczyk - 2009 - Statistical Analysis of Network Data Methods and Models.pdf}
}

@misc{korotinNeuralOptimalTransport2023,
  title = {Neural {{Optimal Transport}}},
  author = {Korotin, Alexander and Selikhanovych, Daniil and Burnaev, Evgeny},
  year = 2023,
  month = mar,
  number = {arXiv:2201.12220},
  eprint = {2201.12220},
  primaryclass = {cs},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2201.12220},
  urldate = {2025-06-11},
  abstract = {We present a novel neural-networks-based algorithm to compute optimal transport maps and plans for strong and weak transport costs. To justify the usage of neural networks, we prove that they are universal approximators of transport plans between probability distributions. We evaluate the performance of our optimal transport algorithm on toy examples and on the unpaired image-to-image translation.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Machine Learning},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-06-11T15:47:07.215Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BFRNCSI7/Korotin et al. - 2023 - Neural Optimal Transport.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/TMSTJG86/2201.html}
}

@article{kriegeSurveyGraphKernels2020,
  title = {A Survey on Graph Kernels},
  author = {Kriege, Nils M. and Johansson, Fredrik D. and Morris, Christopher},
  year = 2020,
  month = jan,
  journal = {Appl Netw Sci},
  volume = {5},
  number = {1},
  pages = {6},
  issn = {2364-8228},
  doi = {10.1007/s41109-019-0195-3},
  urldate = {2025-01-26},
  abstract = {Graph kernels have become an established and widely-used technique for solving classification tasks on graphs. This survey gives a comprehensive overview of techniques for kernel-based graph classification developed in the past 15 years. We describe and categorize graph kernels based on properties inherent to their design, such as the nature of their extracted graph features, their method of computation and their applicability to problems in practice. In an extensive experimental evaluation, we study the classification accuracy of a large suite of graph kernels on established benchmarks as well as new datasets. We compare the performance of popular kernels with several baseline methods and study the effect of applying a Gaussian RBF kernel to the metric induced by a graph kernel. In doing so, we find that simple baselines become competitive after this transformation on some datasets. Moreover, we study the extent to which existing graph kernels agree in their predictions (and prediction errors) and obtain a data-driven categorization of kernels as result. Finally, based on our experimental results, we derive a practitioner's guide to kernel-based graph classification.},
  langid = {english},
  keywords = {a lire,Graph kernels,Machine learning,Supervised graph classification},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/P5RPFA98/Kriege et al. - 2020 - A survey on graph kernels.pdf}
}

@misc{kumpulainenYourBlockOur2024,
  title = {From Your {{Block}} to Our {{Block}}: {{How}} to {{Find Shared Structure}} between {{Stochastic Block Models}} over {{Multiple Graphs}}},
  shorttitle = {From Your {{Block}} to Our {{Block}}},
  author = {Kumpulainen, Iiro and Dalleiger, Sebastian and Vreeken, Jilles and Tatti, Nikolaj},
  year = 2024,
  month = dec,
  number = {arXiv:2412.15476},
  eprint = {2412.15476},
  primaryclass = {cs},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2412.15476},
  urldate = {2025-01-09},
  abstract = {Stochastic Block Models (SBMs) are a popular approach to modeling single real-world graphs. The key idea of SBMs is to partition the vertices of the graph into blocks with similar edge densities within, as well as between different blocks. However, what if we are given not one but multiple graphs that are unaligned and of different sizes? How can we find out if these graphs share blocks with similar connectivity structures? In this paper, we propose the shared stochastic block modeling (SSBM) problem, in which we model n graphs using SBMs that share parameters of s blocks. We show that fitting an SSBM is NP-hard, and consider two approaches to fit good models in practice. In the first, we directly maximize the likelihood of the shared model using a Markov chain Monte Carlo algorithm. In the second, we first fit an SBM for each graph and then select which blocks to share. We propose an integer linear program to find the optimal shared blocks and to scale to large numbers of blocks, we propose a fast greedy algorithm. Through extensive empirical evaluation on synthetic and real-world data, we show that our methods work well in practice.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Computer Science - Social and Information Networks},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R9S2BRF7/Kumpulainen et al. - 2024 - From your Block to our Block How to Find Shared Structure between Stochastic Block Models over Mult.pdf}
}

@inproceedings{kunegisLinkPredictionProblem2010,
  title = {The {{Link Prediction Problem}} in {{Bipartite Networks}}},
  booktitle = {Computational {{Intelligence}} for {{Knowledge-Based Systems Design}}},
  author = {Kunegis, J{\'e}r{\^o}me and De Luca, Ernesto W. and Albayrak, Sahin},
  editor = {H{\"u}llermeier, Eyke and Kruse, Rudolf and Hoffmann, Frank},
  year = 2010,
  pages = {380--389},
  publisher = {Springer},
  address = {Berlin, Heidelberg},
  doi = {10.1007/978-3-642-14049-5_39},
  abstract = {We define and study the link prediction problem in bipartite networks, specializing general link prediction algorithms to the bipartite case. In a graph, a link prediction function of two vertices denotes the similarity or proximity of the vertices. Common link prediction functions for general graphs are defined using paths of length two between two nodes. Since in a bipartite graph adjacency vertices can only be connected by paths of odd lengths, these functions do not apply to bipartite graphs. Instead, a certain class of graph kernels (spectral transformation kernels) can be generalized to bipartite graphs when the positive-semidefinite kernel constraint is relaxed. This generalization is realized by the odd component of the underlying spectral transformation. This construction leads to several new link prediction pseudokernels such as the matrix hyperbolic sine, which we examine for rating graphs, authorship graphs, folksonomies, document--feature networks and other types of bipartite networks.},
  isbn = {978-3-642-14049-5},
  langid = {english},
  keywords = {Bipartite Graph,Bipartite Network,Link Prediction,Mean Average Precision,Preferential Attachment},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/9JHHFCDM/Kunegis et al. - 2010 - The Link Prediction Problem in Bipartite Networks.pdf}
}

@misc{lacosteCommonStructureDiscovery2025,
  title = {Common {{Structure Discovery}} in {{Collections}} of {{Bipartite Networks}}: {{Application}} to {{Pollination Systems}}},
  shorttitle = {Common {{Structure Discovery}} in {{Collections}} of {{Bipartite Networks}}},
  author = {Lacoste, Louis and Barbillon, Pierre and Donnet, Sophie},
  year = 2025,
  month = dec,
  number = {arXiv:2512.01716},
  eprint = {2512.01716},
  primaryclass = {stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2512.01716},
  urldate = {2026-03-17},
  abstract = {Bipartite networks are widely used to encode the ecological interactions. Being able to compare the organization of bipartite networks is a first step toward a better understanding of how environmental factors shape community structure and resilience. Yet current methods for structure detection in bipartite networks overlook shared patterns across collections of networks. We introduce the colBiSBM, a family of probabilistic models for collections of bipartite networks that extends the classical Latent Block Model (LBM). The proposed framework assumes that networks are independent realizations of a shared mesoscale structure, encoded through common inter-block connectivity parameters. We establish identifiability conditions for the different variants of colBiSBM and develop a variational EM algorithm for parameter estimation, coupled with an adaptation of the integrated classification likelihood (ICL) criterion for model selection. We demonstrate how our approach can be used to classify networks based on their topology or organization. Simulation studies highlight the ability of colBiSBM to recover common structures, improve clustering performance, and enhance link prediction by borrowing strength across networks. An application to plant--pollinator networks highlights how the method uncovers shared ecological roles and partitions networks into sub-collections with similar connectivity patterns. These results illustrate the methodological and practical advantages of joint modeling over separate network analyses in the study of bipartite systems.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Computer Science - Machine Learning,Statistics - Applications,Statistics - Machine Learning},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-03-17T08:38:41.405Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/73QMNIZZ/Lacoste et al. - 2025 - Common Structure Discovery in Collections of Bipartite Networks Application to Pollination Systems.pdf}
}

@misc{larousseDefinitionsBipartiBipartite,
  title = {{D\'efinitions : biparti, bipartite - Dictionnaire de fran\c cais Larousse}},
  shorttitle = {{D\'efinitions}},
  author = {Larousse, {\'E}ditions},
  urldate = {2023-06-17},
  abstract = {biparti, bipartite - D\'efinitions Fran\c cais : Retrouvez la d\'efinition de biparti, bipartite, ainsi que les difficult\'es... - synonymes, homonymes, difficult\'es, citations.},
  howpublished = {https://www.larousse.fr/dictionnaires/francais/biparti/9503},
  langid = {french},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/MA2VH6NX/9503.html}
}

@article{latoucheVariationalBayesianInference2012,
  title = {Variational {{Bayesian}} Inference and Complexity Control for Stochastic Block Models},
  author = {Latouche, P and Birmel{\'e}, E and Ambroise, C},
  year = 2012,
  month = feb,
  journal = {Statistical Modelling},
  volume = {12},
  number = {1},
  pages = {93--115},
  publisher = {SAGE Publications India},
  issn = {1471-082X},
  doi = {10.1177/1471082X1001200105},
  urldate = {2025-01-26},
  abstract = {It is now widely accepted that knowledge can be acquired from networks by clustering their vertices according to the connection profiles. Many methods have been proposed and in this paper we concentrate on the Stochastic Block Model (SBM). The clustering of vertices and the estimation of SBM model parameters have been subject to previous work, and numerous inference strategies such as variational expectation maximization (EM) and classification EM have been proposed. However, SBM still suffers from a lack of criteria to estimate the number of components in the mixture. To our knowledge, only one model-based criterion, Integrated Complete-data Likelihood (ICL), has been derived for SBM in the literature. It relies on an asymptotic approximation of the integrated complete-data likelihood and recent studies have shown that it tends to be too conservative in the case of small networks. To tackle this issue, we propose a new criterion that we call Integrated Likelihood Variational Bayes (ILvb), based on a non-asymptotic approximation of the marginal likelihood. We describe how the criterion can be computed through a variational Bayes EM algorithm.},
  langid = {english},
  keywords = {a lire},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/AWA4CBIX/Latouche et al. - 2012 - Variational Bayesian inference and complexity control for stochastic block models.pdf}
}

@misc{legerBlockmodelsLatentStochastic2021,
  title = {Blockmodels: {{Latent}} and {{Stochastic Block Model Estimation}} by a '{{V-EM}}' {{Algorithm}}},
  shorttitle = {Blockmodels},
  author = {Leger, Jean-Benoist and Barbillon, Pierre and Chiquet, Julien},
  year = 2021,
  month = dec,
  urldate = {2024-11-04},
  abstract = {Latent and Stochastic Block Model estimation by a Variational EM algorithm. Various probability distribution are provided (Bernoulli, Poisson...), with or without covariates.},
  copyright = {LGPL-2.1}
}

@article{llopis-belenguerSensitivityBipartiteNetwork2023,
  title = {Sensitivity of Bipartite Network Analyses to Incomplete Sampling and Taxonomic Uncertainty},
  author = {{Llopis-Belenguer}, Cristina and Balbuena, Juan Antonio and {Blasco-Costa}, Isabel and Karvonen, Anssi and Sarabeev, Volodimir and Jokela, Jukka},
  year = 2023,
  month = apr,
  journal = {Ecology},
  volume = {104},
  number = {4},
  pages = {e3974},
  publisher = {John Wiley \& Sons, Ltd},
  issn = {0012-9658},
  doi = {10.1002/ecy.3974},
  urldate = {2025-09-18},
  abstract = {Abstract Bipartite network analysis is a powerful tool to study the processes structuring interactions in ecological communities. In applying the method, it is assumed that the sampled interactions provide an accurate representation of the actual community. However, acquiring a representative sample may be difficult as not all species are equally abundant or easily identifiable. Two potential sampling issues can compromise the conclusions of bipartite network analyses: failure to capture the full range of interactions (sampling completeness) and use of a taxonomic level higher than species to evaluate the network (taxonomic resolution). We asked how commonly used descriptors of bipartite antagonistic communities (modularity, nestedness, connectance, and specialization [H2?]) are affected by reduced host sampling completeness, parasite taxonomic resolution, and their crossed effect, as they are likely to co-occur. We used a quantitative niche model to generate weighted bipartite networks that resembled natural host?parasite communities. The descriptors were more sensitive to uncertainty in parasite taxonomic resolution than to host sampling completeness. When only 10\% of parasite taxonomic resolution was retained, modularity and specialization decreased by \textasciitilde 76\% and \textasciitilde 12\%, respectively, and nestedness and connectance increased by \textasciitilde 114\% and \textasciitilde 345\% respectively. The loss of taxonomic resolution led to a wide range of possible communities, which made it difficult to predict its effects on a given network. With regards to host sampling completeness, standardized nestedness, connectance, and specialization were robust, whereas modularity was sensitive (\textasciitilde 30\% decrease). The combination of both sampling issues had an additive effect on modularity. In communities with low effort for both sampling issues (50\%?10\% of sampling completeness and taxonomic resolution), estimators of modularity, and nestedness could not be distinguished from those of random assemblages. Thus, the categorical description of communities with low sampling effort (e.g., if a community is modular or not) should be done with caution. We recommend evaluating both sampling completeness and taxonomic certainty when conducting bipartite network analyses. Care should also be exercised when using nonrobust descriptors (the four descriptors for parasite taxonomic resolution; modularity for host sampling completeness) when sampling issues are likely to affect a dataset.},
  keywords = {bipartite networks,host-parasite interactions,sampling completeness,sampling issues,taxonomic resolution},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-18T14:55:12.700Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/XC8KSS5S/Llopis-Belenguer et al. - 2023 - Sensitivity of bipartite network analyses to incomplete sampling and taxonomic uncertainty.pdf}
}

@article{lozuponeQuantitativeQualitativeDiversity2007,
  title = {Quantitative and {{Qualitative}} {$\beta$} {{Diversity Measures Lead}} to {{Different Insights}} into {{Factors That Structure Microbial Communities}}},
  author = {Lozupone, Catherine A. and Hamady, Micah and Kelley, Scott T. and Knight, Rob},
  year = 2007,
  month = mar,
  journal = {Applied and Environmental Microbiology},
  volume = {73},
  number = {5},
  pages = {1576--1585},
  publisher = {American Society for Microbiology},
  doi = {10.1128/AEM.01996-06},
  urldate = {2025-11-07},
  abstract = {The assessment of microbial diversity and distribution is a major concern in environmental microbiology. There are two general approaches for measuring community diversity: quantitative measures, which use the abundance of each taxon, and qualitative measures, which use only the presence/absence of data. Quantitative measures are ideally suited to revealing community differences that are due to changes in relative taxon abundance (e.g., when a particular set of taxa flourish because a limiting nutrient source becomes abundant). Qualitative measures are most informative when communities differ primarily by what can live in them (e.g., at high temperatures), in part because abundance information can obscure significant patterns of variation in which taxa are present. We illustrate these principles using two 16S rRNA-based surveys of microbial populations and two phylogenetic measures of community {$\beta$} diversity: unweighted UniFrac, a qualitative measure, and weighted UniFrac, a new quantitative measure, which we have added to the UniFrac website (http://bmf.colorado.edu/unifrac ). These studies considered the relative influences of mineral chemistry, temperature, and geography on microbial community composition in acidic thermal springs in Yellowstone National Park and the influences of obesity and kinship on microbial community composition in the mouse gut. We show that applying qualitative and quantitative measures to the same data set can lead to dramatically different conclusions about the main factors that structure microbial diversity and can provide insight into the nature of community differences. We also demonstrate that both weighted and unweighted UniFrac measurements are robust to the methods used to build the underlying phylogeny.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-11-07T14:55:50.153Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/VJPPM6P5/Lozupone et al. - 2007 - Quantitative and Qualitative β Diversity Measures Lead to Different Insights into Factors That Struc.pdf}
}

@article{machEarlylifeEstablishmentSwine2015,
  title = {Early-Life Establishment of the Swine Gut Microbiome and Impact on Host Phenotypes},
  author = {Mach, N{\'u}ria and Berri, Mustapha and Estell{\'e}, Jordi and Levenez, Florence and Lemonnier, Ga{\"e}tan and Denis, Catherine and Leplat, Jean-Jacques and Chevaleyre, Claire and Billon, Yvon and Dor{\'e}, Jo{\"e}l and {Rogel-Gaillard}, Claire and Lepage, Patricia},
  year = 2015,
  journal = {Environmental Microbiology Reports},
  volume = {7},
  number = {3},
  pages = {554--569},
  issn = {1758-2229},
  doi = {10.1111/1758-2229.12285},
  urldate = {2026-06-08},
  abstract = {Early bacterial colonization and succession within the gastrointestinal tract has been suggested to be crucial in the establishment of specific microbiota composition and the shaping of host phenotype. Here, the composition and dynamics of faecal microbiomes were studied for 31 healthy piglets across five age strata (days 14, 36, 48, 60 and 70 after birth) together with their mothers. Faecal microbiome composition was assessed by 16S rRNA gene 454-pyrosequencing. Bacteroidetes and Firmicutes were the predominant phyla present at each age. For all piglets, luminal secretory IgA concentration was measured at day 70, and body weight was recorded until day 70. The microbiota of suckling piglets was mainly represented by Bacteroides, Oscillibacter, Escherichia/Shigella, Lactobacillus and unclassified Ruminococcaceae genera. This pattern contrasted with that of Acetivibrio, Dialister, Oribacterium, Succinivibrio and Prevotella genera, which appeared increased after weaning. Lactobacillus fermentum might be vertically transferred via breast milk or faeces. The microbiota composition coevolved with their hosts towards two different clusters after weaning, primarily distinguished by unclassified Ruminococcaceae and Prevotella abundances. Prevotella was positively correlated with luminal secretory IgA concentrations, and body weight. Our study opens up new possibilities for health and feed efficiency manipulation via genetic selection and nutrition in the agricultural domain.},
  copyright = {\copyright{} 2015 Society for Applied Microbiology and John Wiley \& Sons Ltd},
  langid = {english},
  keywords = {/unread},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-08T14:16:37.959Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DTILIFV5/Mach et al. - 2015 - Early-life establishment of the swine gut microbiome and impact on host phenotypes.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/SPKZZTZR/1758-2229.html}
}

@article{maeldoreMaelDorePollination_networksScripts2020,
  title = {{{MaelDore}}/{{Pollination}}\_networks: {{R}} Scripts for {{Dor\'e}} et al., 2020 - {{Relative}} Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale},
  shorttitle = {{{MaelDore}}/{{Pollination}}\_networks},
  author = {MaelDore},
  year = 2020,
  month = nov,
  publisher = {Zenodo},
  doi = {10.5281/ZENODO.4290503},
  urldate = {2023-06-21},
  abstract = {R scripts for Dor\'e et al., 2020 - Relative effects of anthropogenic pressures, climate, and sampling design on the structure of pollination networks at the global scale},
  copyright = {Open Access},
  keywords = {data,plant-pollinator}
}

@inproceedings{maHierarchicalTaxonomyAware2018,
  title = {Hierarchical {{Taxonomy Aware Network Embedding}}},
  booktitle = {Proceedings of the 24th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Ma, Jianxin and Cui, Peng and Wang, Xiao and Zhu, Wenwu},
  year = 2018,
  month = jul,
  pages = {1920--1929},
  publisher = {ACM},
  address = {London United Kingdom},
  doi = {10.1145/3219819.3220062},
  urldate = {2025-09-24},
  isbn = {978-1-4503-5552-0},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-24T08:22:06.308Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/RSCR8PVS/Ma et al. - 2018 - Hierarchical Taxonomy Aware Network Embedding.pdf}
}

@article{mariadassouUncoveringLatentStructure2010,
  title = {Uncovering Latent Structure in Valued Graphs: {{A}} Variational Approach},
  shorttitle = {Uncovering Latent Structure in Valued Graphs},
  author = {Mariadassou, Mahendra and Robin, St{\'e}phane and Vacher, Corinne},
  year = 2010,
  month = jun,
  journal = {Ann. Appl. Stat.},
  volume = {4},
  number = {2},
  issn = {1932-6157},
  doi = {10.1214/10-AOAS361},
  urldate = {2026-01-30},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-01-30T14:15:30.804Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/2TY8VL7F/Mariadassou et al. - 2010 - Uncovering latent structure in valued graphs A variational approach.pdf}
}

@article{matchadoNetworkAnalysisMethods2021a,
  title = {Network Analysis Methods for Studying Microbial Communities: {{A}} Mini Review},
  shorttitle = {Network Analysis Methods for Studying Microbial Communities},
  author = {Matchado, Monica Steffi and Lauber, Michael and Reitmeier, Sandra and Kacprowski, Tim and Baumbach, Jan and Haller, Dirk and List, Markus},
  year = 2021,
  month = jan,
  journal = {Computational and Structural Biotechnology Journal},
  volume = {19},
  pages = {2687--2698},
  issn = {2001-0370},
  doi = {10.1016/j.csbj.2021.05.001},
  urldate = {2024-05-16},
  abstract = {Microorganisms including bacteria, fungi, viruses, protists and archaea live as communities in complex and contiguous environments. They engage in numerous inter- and intra- kingdom interactions which can be inferred from microbiome profiling data. In particular, network-based approaches have proven helpful in deciphering complex microbial interaction patterns. Here we give an overview of state-of-the-art methods to infer intra-kingdom interactions ranging from simple correlation- to complex conditional dependence-based methods. We highlight common biases encountered in microbial profiles and discuss mitigation strategies employed by different tools and their trade-off with increased computational complexity. Finally, we discuss current limitations that motivate further method development to infer inter-kingdom interactions and to robustly and comprehensively characterize microbial environments in the future.},
  keywords = {Microbial co-occurrence networks,Microbial interactions,Network analysis,Trans-kingdom interactions},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NAEQFHE8/j.csbj.2021.05.001.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/SXJYNPP7/Matchado et al. - 2021 - Network analysis methods for studying microbial co.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/B6NZVP7Y/S2001037021001823.html}
}

@article{matiasStatisticalClusteringTemporal2017,
  title = {Statistical {{Clustering}} of {{Temporal Networks Through}} a {{Dynamic Stochastic Block Model}}},
  author = {Matias, Catherine and Miele, Vincent},
  year = 2017,
  month = sep,
  journal = {J. R. Stat. Soc. Ser. B. Stat. Methodol.},
  volume = {79},
  number = {4},
  pages = {1119--1141},
  issn = {1369-7412},
  doi = {10.1111/rssb.12200},
  urldate = {2025-09-19},
  abstract = {Statistical node clustering in discrete time dynamic networks is an emerging field that raises many challenges. Here, we explore statistical properties and frequentist inference in a model that combines a stochastic block model for its static part with independent Markov chains for the evolution of the nodes groups through time. We model binary data as well as weighted dynamic random graphs (with discrete or continuous edges values). Our approach, motivated by the importance of controlling for label switching issues across the different time steps, focuses on detecting groups characterized by a stable within-group connectivity behaviour. We study identifiability of the model parameters and propose an inference procedure based on a variational expectation--maximization algorithm as well as a model selection criterion to select the number of groups. We carefully discuss our initialization strategy which plays an important role in the method and we compare our procedure with existing procedures on synthetic data sets. We also illustrate our approach on dynamic contact networks: one of encounters between high school students and two others on animal interactions. An implementation of the method is available as an R package called dynsbm.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-19T14:13:35.038Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DHG8XRWV/Matias et Miele - 2017 - Statistical Clustering of Temporal Networks Through a Dynamic Stochastic Block Model.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/4YPTDS8N/rssb.html;/home/louis/snap/zotero-snap/common/Zotero/storage/XMEBVPEM/rssb.html}
}

@article{mazeletUnsupervisedLearningOptimal,
  title = {Unsupervised {{Learning}} for {{Optimal Transport}} Plan Prediction between Unbalanced Graphs},
  author = {Mazelet, Sonia and Flamary, R{\'e}mi and Thirion, Bertrand},
  abstract = {Optimal transport between graphs, based on Gromov-Wasserstein and other extensions, is a powerful tool for comparing and aligning graph structures. However, solving the associated non-convex optimization problems is computationally expensive, which limits the scalability of these methods to large graphs. In this work, we present Unbalanced Learning of Optimal Transport (ULOT), a deep learning method that predicts optimal transport plans between two graphs. Our method is trained by minimizing the fused unbalanced Gromov-Wasserstein (FUGW) loss. We propose a novel neural architecture with cross-attention that is conditioned on the FUGW tradeoff hyperparameters. We evaluate ULOT on synthetic stochastic block model (SBM) graphs and on real cortical surface data obtained from fMRI. ULOT predicts transport plans with competitive loss up to two orders of magnitude faster than classical solvers. Furthermore, the predicted plan can be used as a warm start for classical solvers to accelerate their convergence. Finally, the predicted transport plan is fully differentiable with respect to the graph inputs and FUGW hyperparameters, enabling the optimization of functionals of the ULOT plan.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-06-11T09:08:09.864Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HPZEYMM9/Mazelet et al. - Unsupervised Learning for Optimal Transport plan prediction between unbalanced graphs.pdf}
}

@article{michalska-smithTellingEcologicalNetworks2019,
  title = {Telling Ecological Networks Apart by Their Structure: {{A}} Computational Challenge},
  shorttitle = {Telling Ecological Networks Apart by Their Structure},
  author = {{Michalska-Smith}, Matthew J. and Allesina, Stefano},
  editor = {Bollenbach, Tobias},
  year = 2019,
  month = jun,
  journal = {PLoS Comput Biol},
  volume = {15},
  number = {6},
  pages = {e1007076},
  issn = {1553-7358},
  doi = {10.1371/journal.pcbi.1007076},
  urldate = {2025-04-11},
  abstract = {Ecologists have been compiling ecological networks for over a century, detailing the interactions between species in a variety of ecosystems. To this end, they have built networks for mutualistic (e.g., pollination, seed dispersal) as well as antagonistic (e.g., herbivory, parasitism) interactions. The type of interaction being represented is believed to be reflected in the structure of the network, which would differ substantially between mutualistic and antagonistic networks. Here, we put this notion to the test by attempting to determine the type of interaction represented in a network based solely on its structure. We find that, although it is easy to separate different kinds of nonecological networks, ecological networks display much structural variation, making it difficult to distinguish between mutualistic and antagonistic interactions. We therefore frame the problem as a challenge for the community of scientists interested in computational biology and machine learning. We discuss the features a good solution to this problem should possess and the obstacles that need to be overcome to achieve this goal.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/4UCENIQQ/Michalska-Smith et Allesina - 2019 - Telling ecological networks apart by their structure A computational challenge.pdf}
}

@article{mieleCorePeripheryDynamics2020,
  title = {Core--Periphery Dynamics in a Plant--Pollinator Network},
  author = {Miele, Vincent and {Ramos-Jiliberto}, Rodrigo and V{\'a}zquez, Diego P.},
  year = 2020,
  journal = {Journal of Animal Ecology},
  volume = {89},
  number = {7},
  pages = {1670--1677},
  issn = {1365-2656},
  doi = {10.1111/1365-2656.13217},
  urldate = {2026-06-10},
  abstract = {Mutualistic networks are highly dynamic, characterized by high temporal turnover of species and interactions. Yet, we have a limited understanding of how the internal structure of these networks and the roles species play in them vary through time. We used 6 years of observation data and a novel statistical method (dynamic stochastic block models) to assess how network structure and species' structural position within the network change throughout subseasons of the flowering season and across years in a quantitative plant--pollinator network from a dryland ecosystem in Argentina. Our analyses revealed a core--periphery structure persistent through subseasons and years. Yet, species structural position as core or peripheral was highly dynamic: virtually all species that were at the core in some subseasons were also peripheral in other subseasons, while many other species always remained peripheral. Our results illuminate our understanding of the dynamics of mutualistic networks and have important implications for ecosystem management and conservation.},
  copyright = {\copyright{} 2020 The Authors. Journal of Animal Ecology published by John Wiley \& Sons Ltd on behalf of British Ecological Society},
  langid = {english},
  keywords = {/unread,core-periphery structure,mutualistic networks,plant-pollinator interactions,species role,stochastic block model,temporal dynamics},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-10T15:31:07.188Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WAJ49JQA/Miele et al. - 2020 - Core–periphery dynamics in a plant–pollinator network.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/TPH4YZSH/1365-2656.html}
}

@article{mieleNineQuickTips2019,
  title = {Nine Quick Tips for Analyzing Network Data},
  author = {Miele, Vincent and Matias, Catherine and Robin, St{\'e}phane and Dray, St{\'e}phane},
  year = 2019,
  month = dec,
  journal = {PLOS Computational Biology},
  volume = {15},
  number = {12},
  pages = {e1007434},
  publisher = {Public Library of Science},
  issn = {1553-7358},
  doi = {10.1371/journal.pcbi.1007434},
  urldate = {2026-06-10},
  langid = {english},
  keywords = {/unread,Biologists,Food web structure,Genetic networks,Mathematical models,Network analysis,Neural networks,Protein interaction networks,Software tools},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-10T16:02:33.829Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GWZ8PNZB/Miele et al. - 2019 - Nine quick tips for analyzing network data.pdf}
}

@incollection{MonotheticAnalysisProgram1990,
  title = {Monothetic {{Analysis}} ({{Program MONA}})},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {280--311},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.ch7},
  urldate = {2024-09-13},
  abstract = {The prelims comprise: Short Description of the Method How to Use the Program MONA Examples More on the Algorithm and the Program Related Methods and References},
  chapter = {7},
  isbn = {978-0-470-31680-1},
  langid = {english},
  keywords = {binary variables,chimpanzee,dissimilarity matrix,missing measurements,monothetic analysis},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/UU46RRAJ/1990 - Monothetic Analysis (Program MONA).pdf}
}

@article{mortonScalableEstimationMicrobial2021,
  title = {Scalable Estimation of Microbial Co-Occurrence Networks with {{Variational Autoencoders}}},
  author = {Morton, James T. and Silverman, Justin and Tikhonov, Gleb and L{\"a}hdesm{\"a}ki, Harri and Bonneau, Rich},
  year = 2021,
  journal = {bioRxiv},
  eprint = {https://www.biorxiv.org/content/early/2021/11/11/2021.11.09.467939.full.pdf},
  publisher = {Cold Spring Harbor Laboratory},
  doi = {10.1101/2021.11.09.467939},
  abstract = {Estimating microbe-microbe interactions is critical for understanding the ecological laws governing microbial communities. Rapidly decreasing sequencing costs have promised new opportunities to estimate microbe-microbe interactions across thousands of uncultured, unknown microbes. However, typical microbiome datasets are very high dimensional and accurate estimation of microbial correlations requires tens of thousands of samples, exceeding the computational capabilities of existing methodologies. Furthermore, the vast majority of microbiome studies collect compositional metagenomics data which enforces a negative bias when computing microbe-microbe correlations. The Multinomial Logistic Normal (MLN) distribution has been shown to be effective at inferring microbe-microbe correlations, however scalable Bayesian inference of these distributions has remained elusive. Here, we show that carefully constructed Variational Autoencoders (VAEs) augmented with the Isometric Log-ratio (ILR) transform can estimate low-rank MLN distributions thousands of times faster than existing methods. These VAEs can be trained on tens of thousands of samples, enabling co-occurrence inference across tens of thousands of microbes without regularization. The latent embedding distances computed from these VAEs are competitive with existing beta-diversity methods across a variety of mouse and human microbiome classification and regression tasks, with notable improvements on longitudinal studies.Competing Interest StatementThe authors have declared no competing interest.},
  elocation-id = {2021.11.09.467939},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-06-30T14:17:29.518Z}
}

@misc{mukherjeeFoodWebDefinition2023,
  title = {Food {{Web}} -- {{Definition}}, {{Trophic Levels}}, {{Types}}, and {{Example}}},
  author = {Mukherjee, Santanu},
  year = 2023,
  month = jan,
  journal = {Science Facts},
  urldate = {2025-09-21},
  abstract = {What is a food web in biology. How does it work. Learn its different levels and types with an example and a simple labeled diagram.},
  langid = {american},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-23T11:03:33.193Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HGFFMFJV/food-web.html}
}

@misc{mukherjeeTrophicLevelDefinition2023,
  title = {Trophic {{Level}} - {{Definition}}, {{Examples}}, and {{Diagram}}},
  author = {Mukherjee, Santanu},
  year = 2023,
  month = jan,
  journal = {Science Facts},
  urldate = {2025-09-21},
  abstract = {What is a trophic level. How many are there. How much energy is transferred between them. How much energy is lost at each level. Learn a few examples with a diagram.},
  langid = {american},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-23T11:03:32.947Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/KB5XH66Q/trophic-level.html}
}

@article{nennaLecture1Monge,
  title = {Lecture 1 {{Monge}} and {{Kantorovich}} Problems: From Primal to Dual},
  author = {Nenna, Luca},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-06-13T09:24:13.832Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7LVQPD6D/Nenna - Lecture 1 Monge and Kantorovich problems from primal to dual.pdf}
}

@article{nennaLecture2Entropic,
  title = {Lecture 2: {{Entropic Optimal Transport}}},
  author = {Nenna, Luca},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-06-11T16:06:28.547Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WGFIISDB/Nenna - Lecture 2 Entropic Optimal Transport.pdf}
}

@inproceedings{neumannBipartiteStochasticBlock2018,
  title = {Bipartite {{Stochastic Block Models}} with {{Tiny Clusters}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Neumann, Stefan},
  year = 2018,
  volume = {31},
  publisher = {Curran Associates, Inc.},
  urldate = {2026-05-20},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-20T15:43:55.470Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NNDNV2GV/Neumann - 2018 - Bipartite Stochastic Block Models with Tiny Clusters.pdf}
}

@article{newmanFindingCommunityStructure2006,
  title = {Finding Community Structure in Networks Using the Eigenvectors of Matrices},
  author = {Newman, M. E. J.},
  year = 2006,
  month = sep,
  journal = {Phys. Rev. E},
  volume = {74},
  number = {3},
  pages = {036104},
  issn = {1539-3755, 1550-2376},
  doi = {10.1103/PhysRevE.74.036104},
  urldate = {2024-09-09},
  copyright = {http://link.aps.org/licenses/aps-default-license},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/TJG6PCAK/Newman - 2006 - Finding community structure in networks using the eigenvectors of matrices.pdf}
}

@inproceedings{ngSpectralClusteringAnalysis2001,
  title = {On {{Spectral Clustering}}: {{Analysis}} and an Algorithm},
  shorttitle = {On {{Spectral Clustering}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Ng, Andrew and Jordan, Michael and Weiss, Yair},
  year = 2001,
  volume = {14},
  publisher = {MIT Press},
  urldate = {2025-10-08},
  abstract = {Despite many empirical successes of spectral  clustering  methods(cid:173) algorithms  that  cluster  points  using  eigenvectors  of  matrices  de(cid:173) rived  from  the  data- there  are  several  unresolved  issues.  First,  there  are  a  wide  variety  of  algorithms  that  use  the  eigenvectors  in  slightly  different  ways.  Second,  many of these  algorithms  have  no  proof that  they  will  actually  compute  a  reasonable  clustering.  In  this  paper,  we  present  a  simple  spectral  clustering  algorithm  that can be implemented using a  few  lines  of Matlab.  Using  tools  from  matrix  perturbation  theory,  we  analyze  the  algorithm,  and  give  conditions  under  which  it  can  be  expected  to  do  well.  We  also  show  surprisingly  good  experimental  results  on  a  number  of  challenging clustering problems.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-10-08T12:47:34.666Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/SEWUWLHD/Ng et al. - 2001 - On Spectral Clustering Analysis and an algorithm.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/XRY4F47W/nips01-spectral.pdf}
}

@misc{nguenNetworkTwosampleTest2024,
  title = {Network Two-Sample Test for Block Models},
  author = {Nguen, Chung Kyong and Padilla, Oscar Hernan Madrid and Amini, Arash A.},
  year = 2024,
  month = jun,
  number = {arXiv:2406.06014},
  eprint = {2406.06014},
  primaryclass = {cs, math, stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2406.06014},
  urldate = {2024-06-17},
  abstract = {We consider the two-sample testing problem for networks, where the goal is to determine whether two sets of networks originated from the same stochastic model. Assuming no vertex correspondence and allowing for different numbers of nodes, we address a fundamental network testing problem that goes beyond simple adjacency matrix comparisons. We adopt the stochastic block model (SBM) for network distributions, due to their interpretability and the potential to approximate more general models. The lack of meaningful node labels and vertex correspondence translate to a graph matching challenge when developing a test for SBMs. We introduce an efficient algorithm to match estimated network parameters, allowing us to properly combine and contrast information within and across samples, leading to a powerful test. We show that the matching algorithm, and the overall test are consistent, under mild conditions on the sparsity of the networks and the sample sizes, and derive a chi-squared asymptotic null distribution for the test. Through a mixture of theoretical insights and empirical validations, including experiments with both synthetic and real-world data, this study advances robust statistical inference for complex network data.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Social and Information Networks,Mathematics - Statistics Theory,Statistics - Machine Learning,Statistics - Methodology},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WTJ5G2TZ/Nguen et al. - 2024 - Network two-sample test for block models.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/9RCYZK37/2406.html}
}

@article{ohlssonVariabilityEcologicalRelevance2024,
  title = {Variability and Ecological Relevance of Alternative Group Structures in Food Webs},
  author = {Ohlsson, Mikael and Ekl{\"o}f, Anna},
  year = 2024,
  month = sep,
  journal = {Ecological Informatics},
  volume = {82},
  pages = {102696},
  issn = {15749541},
  doi = {10.1016/j.ecoinf.2024.102696},
  urldate = {2024-10-31},
  abstract = {Broad-scale interaction patterns among species in food webs can be identified using the group model, which identifies groups of species, sharing similar sets of predators and prey from other groups. These shared relationships are relevant for the functionality of species. The group model originates from stochastic block models, meaning the obtained group structures of the same food web can differ in multiple runs. A single best partition may miss relevant information, and a consensus solution may blur complementary communities. Hence, it is highly relevant to analyze the full solution landscape while searching for the optimal partitioning of species. In particular, a narrow solution landscape would highlight the reliability of the identified groups. Here, using five empirical food webs, we analyze their respective solution landscape based on multiple group model runs of the same network. By analyzing the solution landscapes, we aim to explain the differences between solutions and what they entail, structurally and ecologically. Our results show that the overall general group structures remain intact across different iterations. While some food webs vary more, differences are commonly limited to a smaller number of groups with seemingly similar species roles. Our results suggest that while the stochastic process of the group model can generate alternate solutions for the same food web, these differences generally involve weaker distinctions of species in a small number of groups rather than a large structural turnover.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/9SNT5T6H/Ohlsson et Eklöf - 2024 - Variability and ecological relevance of alternative group structures in food webs.pdf}
}

@article{onaDisentanglingMicrobialInteraction2025,
  title = {Disentangling Microbial Interaction Networks},
  author = {O{\~n}a, Leonardo and Shreekar, Shryli K. and Kost, Christian},
  year = 2025,
  month = jun,
  journal = {Trends in Microbiology},
  volume = {33},
  number = {6},
  pages = {619--634},
  publisher = {Elsevier},
  issn = {0966-842X, 1878-4380},
  doi = {10.1016/j.tim.2025.01.013},
  urldate = {2026-06-05},
  langid = {english},
  pmid = {40044528},
  keywords = {/unread,co-occurrence network,ecological interaction,flux-balance analysis,microbial interaction network,network structure,topology},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-05T09:26:51.785Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/4HNJN4BF/Oña et al. - 2025 - Disentangling microbial interaction networks.pdf}
}

@book{ottawafield-naturalistsclubCanadianFieldnaturalist1976,
  title = {The {{Canadian}} Field-Naturalist},
  author = {{Ottawa Field-Naturalists' Club} and Club, Ottawa Field-Naturalists'},
  year = 1976,
  volume = {90},
  pages = {1--568},
  publisher = {Ottawa Field-Naturalists' Club},
  address = {Ottawa},
  issn = {0008-3550},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DFN9BYBR/28045499.html}
}

@incollection{PartitioningMedoidsProgram1990,
  title = {Partitioning {{Around Medoids}} ({{Program PAM}})},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {68--125},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.ch2},
  urldate = {2024-09-13},
  abstract = {The prelims comprise: Short Description of the Method How to Use the Program PAM Examples More on the Algorithm and the Program Related Methods and References},
  chapter = {2},
  isbn = {978-0-470-31680-1},
  langid = {english},
  keywords = {central memory,graphical representation,medoids,partitioning around medoids,representative objects},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8MSBVNEH/1990 - Partitioning Around Medoids (Program PAM).pdf}
}

@article{pavlopoulosBipartiteGraphsSystems2018,
  title = {Bipartite Graphs in Systems Biology and Medicine: A Survey of Methods and Applications},
  shorttitle = {Bipartite Graphs in Systems Biology and Medicine},
  author = {Pavlopoulos, Georgios A and Kontou, Panagiota I and Pavlopoulou, Athanasia and Bouyioukos, Costas and Markou, Evripides and Bagos, Pantelis G},
  year = 2018,
  month = apr,
  journal = {GigaScience},
  volume = {7},
  number = {4},
  pages = {giy014},
  issn = {2047-217X},
  doi = {10.1093/gigascience/giy014},
  urldate = {2023-06-15},
  abstract = {The latest advances in high-throughput techniques during the past decade allowed the systems biology field to expand significantly. Today, the focus of biologists has shifted from the study of individual biological components to the study of complex biological systems and their dynamics at a larger scale. Through the discovery of novel bioentity relationships, researchers reveal new information about biological functions and processes. Graphs are widely used to represent bioentities such as proteins, genes, small molecules, ligands, and others such as nodes and their connections as edges within a network. In this review, special focus is given to the usability of bipartite graphs and their impact on the field of network biology and medicine. Furthermore, their topological properties and how these can be applied to certain biological case studies are discussed. Finally, available methodologies and software are presented, and useful insights on how bipartite graphs can shape the path toward the solution of challenging biological problems are provided.},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/2KJFL3SB/Pavlopoulos et al. - 2018 - Bipartite graphs in systems biology and medicine .pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/7R69YIS7/pavlopoulos2018.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/A2Y2EGPA/pavlopoulos2018.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/UK2MK5FW/pavlopoulos2018.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/GDG5ULWR/4875933.html;/home/louis/snap/zotero-snap/common/Zotero/storage/XP7G4PZF/4875933.html}
}

@article{pavlovicMultisubjectStochasticBlockmodels2020,
  title = {Multi-Subject {{Stochastic Blockmodels}} for Adaptive Analysis of Individual Differences in Human Brain Network Cluster Structure},
  author = {Pavlovi{\'c}, Dragana M. and Guillaume, Bryan R. L. and Towlson, Emma K. and Kuek, Nicole M. Y. and Afyouni, Soroosh and V{\'e}rtes, Petra E. and Yeo, B. T. Thomas and Bullmore, Edward T. and Nichols, Thomas E.},
  year = 2020,
  month = oct,
  journal = {Neuroimage},
  volume = {220},
  pages = {116611},
  issn = {1095-9572},
  doi = {10.1016/j.neuroimage.2020.116611},
  abstract = {There is considerable interest in elucidating the cluster structure of brain networks in terms of modules, blocks or clusters of similar nodes. However, it is currently challenging to handle data on multiple subjects since most of the existing methods are applicable only on a subject-by-subject basis or for analysis of an average group network. The main limitation of per-subject models is that there is no obvious way to combine the results for group comparisons, and of group-averaged models that they do not reflect the variability between subjects. Here, we propose two new extensions of the classical Stochastic Blockmodel (SBM) that use a mixture model to estimate blocks or clusters of connected nodes, combined with a regression model to capture the effects of subject-level covariates on individual differences in cluster structure. The proposed Multi-Subject Stochastic Blockmodels (MS-SBMs) can flexibly account for between-subject variability in terms of homogeneous or heterogeneous covariate effects on connectivity using subject demographics such as age or diagnostic status. Using synthetic data, representing a range of block sizes and cluster structures, we investigate the accuracy of the estimated MS-SBM parameters as well as the validity of inference procedures based on the Wald, likelihood ratio and permutation tests. We show that the proposed multi-subject SBMs recover the true cluster structure of synthetic networks more accurately and adaptively than standard methods for modular decomposition (i.e. the Fast Louvain and Newman Spectral algorithms). Permutation tests of MS-SBM parameters were more robustly valid for statistical inference and Type I error control than tests based on standard asymptotic assumptions. Applied to analysis of multi-subject resting-state fMRI networks (13 healthy volunteers; 12 people with schizophrenia; n=268 brain regions), we show that Heterogeneous Stochastic Blockmodel (Het-SBM) identifies a range of network topologies simultaneously, including modular and core structures.},
  langid = {english},
  pmid = {32058004},
  keywords = {Brain,Community detection,Computer Simulation,Connectome,Default Mode Network,Firth estimation,Humans,Individuality,Integrated classification likelihood criterion,Likelihood ratio,Magnetic Resonance Imaging,Mixture models,Models Neurological,Models Statistical,Modularity,Multi-subject network analysis,Nerve Net,Network analysis,Permutation test,Schizophrenia,Stochastic block model,Stochastic blockmodel,Variational approximation,Wald test},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-19T14:04:30.797Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ENZLGULT/Pavlović et al. - 2020 - Multi-subject Stochastic Blockmodels for adaptive analysis of individual differences in human brain.pdf}
}

@article{pavoineDissimilaritiesSpeciesDissimilarities2004,
  title = {From Dissimilarities among Species to Dissimilarities among Communities: A Double Principal Coordinate Analysis},
  shorttitle = {From Dissimilarities among Species to Dissimilarities among Communities},
  author = {Pavoine, Sandrine and Dufour, Anne-B{\'e}atrice and Chessel, Daniel},
  year = 2004,
  month = jun,
  journal = {Journal of Theoretical Biology},
  volume = {228},
  number = {4},
  pages = {523--537},
  issn = {00225193},
  doi = {10.1016/j.jtbi.2004.02.014},
  urldate = {2026-06-05},
  abstract = {This paper presents a new ordination method to compare several communities containing species that differ according to their taxonomic, morphological or biological features. The objective is first to find dissimilarities among communities from the knowledge about differences among their species, and second to describe these dissimilarities with regard to the feature diversity within communities. In 1986, Rao initiated a general framework for analysing the extent of the diversity. He defined a diversity coefficient called quadratic entropy and a dissimilarity coefficient and proposed a decomposition of this diversity coefficient in a way similar to ANOVA. Furthermore, Gower and Legendre (1986) built a weighted principal coordinate analysis. Using the previous context, we propose a new method called the double principal coordinate analysis (DPCoA) to analyse the relation between two kinds of data. The first contains differences among species (dissimilarity matrix); the second the species distribution among communities (abundance or presence/absence matrix). A multidimensional space assembling the species points and the community points is built. The species points define the original differences between species and the community points define the deduced differences between communities. Furthermore, this multidimensional space is linked with the diversity decomposition into between-community and within-community diversities. One looks for axes that provide a graphical ordination of the communities and project the species onto them. An illustration is proposed comparing bird communities which live in different areas under mediterranean bioclimates. Compared to some existing methods, the double principal coordinate analysis can provide a typology of communities taking account of an abundance matrix and can include dissimilarities among species. Finally, we show that such an approach generalizes some of these methods and allows us to developnew analyses.},
  copyright = {https://www.elsevier.com/tdm/userlicense/1.0/},
  langid = {english},
  keywords = {/unread},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-05T12:58:07.242Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/QB54QY49/Pavoine et al. - 2004 - From dissimilarities among species to dissimilarities among communities a double principal coordina.pdf}
}

@misc{payneFiniteMixturesMultivariate2023,
  title = {Finite {{Mixtures}} of {{Multivariate Poisson-Log Normal Factor Analyzers}} for {{Clustering Count Data}}},
  author = {Payne, Andrea and Silva, Anjali and Rothstein, Steven J. and McNicholas, Paul D. and Subedi, Sanjeena},
  year = 2023,
  month = nov,
  number = {arXiv:2311.07762},
  eprint = {2311.07762},
  primaryclass = {stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2311.07762},
  urldate = {2025-07-02},
  abstract = {A mixture of multivariate Poisson-log normal factor analyzers is introduced by imposing constraints on the covariance matrix, which resulted in flexible models for clustering purposes. In particular, a class of eight parsimonious mixture models based on the mixtures of factor analyzers model are introduced. Variational Gaussian approximation is used for parameter estimation, and information criteria are used for model selection. The proposed models are explored in the context of clustering discrete data arising from RNA sequencing studies. Using real and simulated data, the models are shown to give favourable clustering performance. The GitHub R package for this work is available at https://github.com/anjalisilva/mixMPLNFA and is released under the open-source MIT license.},
  archiveprefix = {arXiv},
  keywords = {Statistics - Computation,Statistics - Machine Learning,Statistics - Methodology},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-07-02T09:31:47.579Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/BXVPEIDD/Payne et al. - 2023 - Finite Mixtures of Multivariate Poisson-Log Normal Factor Analyzers for Clustering Count Data.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/L5DAS5C2/2311.html}
}

@misc{peixotoBayesianStochasticBlockmodeling2023,
  title = {Bayesian Stochastic Blockmodeling},
  author = {Peixoto, Tiago P.},
  year = 2023,
  month = mar,
  eprint = {1705.10225},
  primaryclass = {stat},
  doi = {10.1002/9781119483298.ch11},
  urldate = {2025-01-26},
  abstract = {This chapter provides a self-contained introduction to the use of Bayesian inference to extract large-scale modular structures from network data, based on the stochastic blockmodel (SBM), as well as its degree-corrected and overlapping generalizations. We focus on nonparametric formulations that allow their inference in a manner that prevents overfitting, and enables model selection. We discuss aspects of the choice of priors, in particular how to avoid underfitting via increased Bayesian hierarchies, and we contrast the task of sampling network partitions from the posterior distribution with finding the single point estimate that maximizes it, while describing efficient algorithms to perform either one. We also show how inferring the SBM can be used to predict missing and spurious links, and shed light on the fundamental limitations of the detectability of modular structures in networks.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {a lire,Condensed Matter - Statistical Mechanics,Physics - Data Analysis Statistics and Probability,Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/L34VRKGF/Peixoto - 2023 - Bayesian stochastic blockmodeling.pdf}
}

@article{peixotoEfficientMonteCarlo2014,
  title = {Efficient {{Monte Carlo}} and Greedy Heuristic for the Inference of Stochastic Block Models},
  author = {Peixoto, Tiago P.},
  year = 2014,
  month = jan,
  journal = {Phys. Rev. E},
  volume = {89},
  number = {1},
  pages = {012804},
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevE.89.012804},
  urldate = {2025-01-26},
  abstract = {We present an efficient algorithm for the inference of stochastic block models in large networks. The algorithm can be used as an optimized Markov chain Monte Carlo (MCMC) method, with a fast mixing time and a much reduced susceptibility to getting trapped in metastable states, or as a greedy agglomerative heuristic, with an almost linear {$O$}⁡({$N$}⁢ln2⁡{$N$}) complexity, where {$N$} is the number of nodes in the network, independent of the number of blocks being inferred. We show that the heuristic is capable of delivering results which are indistinguishable from the more exact and numerically expensive MCMC method in many artificial and empirical networks, despite being much faster. The method is entirely unbiased towards any specific mixing pattern, and in particular it does not favor assortative community structures.},
  keywords = {a lire},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/9C6TE4FS/Peixoto - 2014 - Efficient Monte Carlo and greedy heuristic for the inference of stochastic block models.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/Q4LRS4GA/PhysRevE.89.html}
}

@article{peixotoHierarchicalBlockStructures2014,
  title = {Hierarchical {{Block Structures}} and {{High-Resolution Model Selection}} in {{Large Networks}}},
  author = {Peixoto, Tiago P.},
  year = 2014,
  month = mar,
  journal = {Phys. Rev. X},
  volume = {4},
  number = {1},
  pages = {011047},
  issn = {2160-3308},
  doi = {10.1103/PhysRevX.4.011047},
  urldate = {2025-09-26},
  copyright = {http://creativecommons.org/licenses/by/3.0/},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-26T08:27:38.586Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/R58ZQK8J/Peixoto - 2014 - Hierarchical Block Structures and High-Resolution Model Selection in Large Networks.pdf}
}

@misc{peyreComputationalOptimalTransport2020,
  title = {Computational {{Optimal Transport}}},
  author = {Peyr{\'e}, Gabriel and Cuturi, Marco},
  year = 2020,
  month = mar,
  number = {arXiv:1803.00567},
  eprint = {1803.00567},
  primaryclass = {stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1803.00567},
  urldate = {2024-05-14},
  abstract = {Optimal transport (OT) theory can be informally described using the words of the French mathematician Gaspard Monge (1746-1818): A worker with a shovel in hand has to move a large pile of sand lying on a construction site. The goal of the worker is to erect with all that sand a target pile with a prescribed shape (for example, that of a giant sand castle). Naturally, the worker wishes to minimize her total effort, quantified for instance as the total distance or time spent carrying shovelfuls of sand. Mathematicians interested in OT cast that problem as that of comparing two probability distributions, two different piles of sand of the same volume. They consider all of the many possible ways to morph, transport or reshape the first pile into the second, and associate a "global" cost to every such transport, using the "local" consideration of how much it costs to move a grain of sand from one place to another. Recent years have witnessed the spread of OT in several fields, thanks to the emergence of approximate solvers that can scale to sizes and dimensions that are relevant to data sciences. Thanks to this newfound scalability, OT is being increasingly used to unlock various problems in imaging sciences (such as color or texture processing), computer vision and graphics (for shape manipulation) or machine learning (for regression, classification and density fitting). This short book reviews OT with a bias toward numerical methods and their applications in data sciences, and sheds lights on the theoretical properties of OT that make it particularly useful for some of these applications.},
  archiveprefix = {arXiv},
  keywords = {Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/64Q9WE2Z/Peyré et Cuturi - 2020 - Computational Optimal Transport.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/3GAQMNL8/1803.html}
}

@article{peyreGromovWassersteinAveragingKernel,
  title = {Gromov-{{Wasserstein Averaging}} of {{Kernel}} and {{Distance Matrices}}},
  author = {Peyr{\'e}, Gabriel and Cuturi, Marco and Solomon, Justin},
  abstract = {This paper presents a new technique for computing the barycenter of a set of distance or kernel matrices. These matrices, which define the interrelationships between points sampled from individual domains, are not required to have the same size or to be in row-by-row correspondence. We compare these matrices using the softassign criterion, which measures the minimum distortion induced by a probabilistic map from the rows of one similarity matrix to the rows of another; this criterion amounts to a regularized version of the Gromov-Wasserstein (GW) distance between metric-measure spaces. The barycenter is then defined as a Fr\textasciiacute echet mean of the input matrices with respect to this criterion, minimizing a weighted sum of softassign values. We provide a fast iterative algorithm for the resulting nonconvex optimization problem, built upon state-ofthe-art tools for regularized optimal transportation. We demonstrate its application to the computation of shape barycenters and to the prediction of energy levels from molecular configurations in quantum chemistry.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-06-11T16:01:10.274Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NZICAQGD/Peyré et al. - Gromov-Wasserstein Averaging of Kernel and Distance Matrices.pdf}
}

@article{pichonTellingMutualisticAntagonistic,
  title = {Telling Mutualistic and Antagonistic Ecological Networks Apart by Learning Their Multiscale Structure},
  author = {Pichon, Beno{\^i}t and Le Goff, R{\'e}my and Morlon, H{\'e}l{\`e}ne and {Perez-Lamarque}, Beno{\^i}t},
  journal = {Methods in Ecology and Evolution},
  volume = {n/a},
  number = {n/a},
  issn = {2041-210X},
  doi = {10.1111/2041-210X.14328},
  urldate = {2024-05-27},
  abstract = {Characterizing and understanding the processes that shape the structure of ecological networks, which represent who interacts with whom in a community, has many implications in ecology, evolutionary biology and conservation. A highly debated question is whether and how the structure of a bipartite ecological network differs between antagonistic (e.g. herbivory) and mutualistic (e.g. pollination) interaction types. Here, we tackle this question by using a multiscale characterization of network structure, machine learning tools, and a large database of empirical and simulated bipartite networks. Contrary to previous studies focusing on global structural metrics, such as nestedness and modularity, which concluded that antagonistic and mutualistic networks cannot be told apart from only their structure, we find that they can be told apart by combining a meso-scale characterization of their structure and supervised machine learning. Motif frequencies appear particularly informative, with an over-representation of densely connected motifs in antagonistic networks and of motifs with asymmetrical specialization in mutualistic networks. These structural properties can be used to predict the type of interaction with relatively good confidence. Beyond this classical mutualism/antagonism dichotomy, we also find significant structural uniqueness linked to specific ecologies (e.g. pollination, parasitism). Our results clarify structural differences between antagonistic and mutualistic networks and suggest the investigation of the structural uniqueness of specific ecologies as a promising approach for characterizing interactions beyond the coarse antagonistic/mutualistic dichotomy.},
  copyright = {\copyright{} 2024 The Authors. Methods in Ecology and Evolution published by John Wiley \& Sons Ltd on behalf of British Ecological Society.},
  langid = {english},
  keywords = {ecological interactions,interaction classification,machine learning,motif frequency,network structure},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HDXEFQ6I/Pichon et al. - Telling mutualistic and antagonistic ecological ne.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/YPGSCJYI/Pichon et al. - 2024 - Telling mutualistic and antagonistic ecological ne.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/TIZFQRBG/2041-210X.html;/home/louis/snap/zotero-snap/common/Zotero/storage/UUBAT5WF/2041-210X.html}
}

@unpublished{priamNegativeBinomialLatent2024,
  title = {Negative Binomial Latent Block Model with Generalized Constraints},
  author = {Priam, Rodolphe},
  year = 2024,
  month = nov,
  urldate = {2025-10-29},
  abstract = {Constrained latent block models (LBM) are proposed for contingency matrices herein. Several discrete distributions related to the usual Poisson one are compared for modeling the blocks in a co-clustering and a reduction of the rows and columns.},
  keywords = {expectation maximization,latent block model,negative binomial distribution,regression},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-10-29T15:37:28.803Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S8LUVP3F/Priam - 2024 - Negative binomial latent block model with generalized constraints.pdf}
}

@article{pudloReliableABCModel2016a,
  title = {Reliable {{ABC}} Model Choice via Random Forests},
  author = {Pudlo, Pierre and Marin, Jean-Michel and Estoup, Arnaud and Cornuet, Jean-Marie and Gautier, Mathieu and Robert, Christian P.},
  year = 2016,
  month = mar,
  journal = {Bioinformatics},
  volume = {32},
  number = {6},
  pages = {859--866},
  issn = {1367-4803},
  doi = {10.1093/bioinformatics/btv684},
  urldate = {2026-04-07},
  abstract = {Motivation: Approximate Bayesian computation (ABC) methods provide an elaborate approach to Bayesian inference on complex models, including model choice. Both theoretical arguments and simulation experiments indicate, however, that model posterior probabilities may be poorly evaluated by standard ABC techniques.Results: We propose a novel approach based on a machine learning tool named random forests (RF) to conduct selection among the highly complex models covered by ABC algorithms. We thus modify the way Bayesian model selection is both understood and operated, in that we rephrase the inferential goal as a classification problem, first predicting the model that best fits the data with RF and postponing the approximation of the posterior probability of the selected model for a second stage also relying on RF. Compared with earlier implementations of ABC model choice, the ABC RF approach offers several potential improvements: (i) it often has a larger discriminative power among the competing models, (ii) it is more robust against the number and choice of statistics summarizing the data, (iii) the computing effort is drastically reduced (with a gain in computation efficiency of at least 50) and (iv) it includes an approximation of the posterior probability of the selected model. The call to RF will undoubtedly extend the range of size of datasets and complexity of models that ABC can handle. We illustrate the power of this novel methodology by analyzing controlled experiments as well as genuine population genetics datasets.Availability and implementation: The proposed methodology is implemented in the R package abcrf available on the CRAN.Contact: ~jean-michel.marin@umontpellier.frSupplementary information: ~Supplementary data are available at Bioinformatics online.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-07T13:23:20.640Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/S2AWU3RD/Pudlo et al. - 2016 - Reliable ABC model choice via random forests.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/Q6XY6YCK/btv684.html}
}

@article{puTreeEnhancedLatentSpace2025,
  title = {Tree-{{Enhanced Latent Space Models}} for {{Two-Mode Networks}}},
  author = {Pu, Dan and Fan, Xinyan and Fang, Kuangnan},
  year = 2025,
  month = jun,
  journal = {Journal of Computational and Graphical Statistics},
  volume = {0},
  number = {0},
  pages = {1--9},
  publisher = {ASA Website},
  issn = {1061-8600},
  doi = {10.1080/10618600.2025.2527295},
  urldate = {2025-09-23},
  abstract = {Latent space models have garnered significant attention in the analysis of two-mode networks. In numerous applications, auxiliary information in the form of a hierarchical tree structure, which elucidates the interrelationships between nodes and provides extensive insights into connectivity patterns, can be easily obtained. To harness the potential of such tree-structured information, we introduce an innovative tree-enhanced latent space model (TLSM) for two-mode networks. In this framework, each node is characterized by a latent embedding vector, reparameterized as the aggregate of intermediate vectors corresponding to nodes within the tree structure. By optimizing the log-likelihood function augmented with a tree-based regularization term, the proposed model facilitates the simultaneous estimation of embedding vectors and the derivation of interpretable community structures. We have developed an efficient Alternating Direction Method of Multipliers (ADMM) algorithm to solve the resulting optimization problem. Theoretical analysis establishes the consistency of the proposed estimator under some mild conditions. Furthermore, comprehensive simulation studies and empirical applications on the Amazon review dataset substantiate the efficacy and practical relevance of the proposed model. Supplementary materials for this article are available online.},
  keywords = {Latent space model,Tree-structured information,Two-mode network},
  annotation = {Read\_Status: Read\\
Read\_Status\_Date: 2025-09-24T13:31:51.261Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/9DVZDMA7/Appendix.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/NK5GZXTL/Pu et al. - Tree-Enhanced Latent Space Models for Two-Mode Networks.pdf}
}

@misc{quericBridgingMaximumLikelihood2026,
  title = {Bridging {{Maximum Likelihood}} and {{Optimal Transport}} for {{Efficient Inference}} and {{Model Selection}} in {{Stochastic Block Models}}},
  author = {Queric, Simon and {Vincent-Cuaz}, C{\'e}dric and Bouveyron, Charles and Corneli, Marco},
  year = 2026,
  doi = {10.48550/ARXIV.2605.28488},
  urldate = {2026-06-03},
  abstract = {We study inference in stochastic block models (SBMs) through the lens of optimal transport (OT). We first establish that maximum likelihood variational inference (MLVI) can be interpreted as a semi-relaxed Gromov-Wasserstein (srGW) projection with entropic regularization. While this formulation yields accurate clustering, the entropic regularization prevents transport plans to be sparse, hindering intrinsic model selection. Consequently, we investigate unregularized srGW estimators, and prove that they consistently recover both the SBM connectivity matrix and latent cluster assignments in the asymptotic regime. However, this asymptotic property does not translate into reliable model selection in finite samples, and calls for additional mechanisms to promote sparsity in the inferred cluster proportions. We empirically show that such a regularized formulation yields estimators that simultaneously recover model parameters and select the number of clusters in a single optimization problem, thereby avoiding costly grid search or heuristic model selection procedures.},
  copyright = {Creative Commons Attribution 4.0 International},
  keywords = {/unread,FOS: Computer and information sciences,FOS: Mathematics,Machine Learning (cs.LG),Machine Learning (stat.ML),Statistics Theory (math.ST)},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-06-03T07:59:33.078Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/M2S6A2E2/Queric et al. - 2026 - Bridging Maximum Likelihood and Optimal Transport for Efficient Inference and Model Selection in Sto.pdf}
}

@article{ramos-jilibertoTopologicalChangeAndean2010,
  title = {Topological Change of {{Andean}} Plant--Pollinator Networks along an Altitudinal Gradient},
  author = {{Ramos-Jiliberto}, Rodrigo and Dom{\'i}nguez, Daniela and Espinoza, Claudia and L{\'o}pez, Gioconda and Valdovinos, Fernanda S. and Bustamante, Ramiro O. and Medel, Rodrigo},
  year = 2010,
  month = mar,
  journal = {Ecological Complexity},
  volume = {7},
  number = {1},
  pages = {86--90},
  issn = {1476-945X},
  doi = {10.1016/j.ecocom.2009.06.001},
  urldate = {2023-06-15},
  abstract = {Pollination interaction networks exhibit structural regularities across a wide range of natural environments. Long-tailed degree distribution, nestedness, and modularity are the most prevalent topological patterns found in most bipartite networks analyzed up to day. In this work we evaluate the variation of these topological properties along an altitudinal gradient. To this end, we examined four plant--pollinator networks from the Chilean Andes at 33{$^\circ$}S, in range from 1800 to 3600m elevation. Our results indicate that network topology is strongly and systematically affected by elevation. At increasing altitude, the number of potential visitors per plant decreased, and species' degree distributions are closer to random expectations. On the other hand, the nested structure of mutualistic interactions systematically decreased with elevation, and network modularity was significantly higher than random expectations over the entire altitudinal range. In addition, at increasing elevations the pollination networks were organized in fewer and more strongly connected modules. Our results suggest that the severe abiotic conditions found at increased elevations translate into less organized pollination networks.},
  langid = {english},
  keywords = {bipartite-example,Chile,Complexity,Degree distribution,Modularity,Mutualistic networks,Nestedness,Power law},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ATY3ZP2X/Ramos-Jiliberto et al. - 2010 - Topological change of Andean plant–pollinator netw.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/HPBGUP65/ramos-jiliberto2010.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/I33MZQQ7/ramos-jiliberto2010.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/YJX8XBNW/S1476945X09000622.html}
}

@article{ratnaInclusiveAnalysisPerformance2025,
  title = {An Inclusive Analysis for Performance and Efficiency of Graph Neural Network Models for Node Classification},
  author = {Ratna, S. and Singh, Sukhdeep and Sharma, Anuj},
  year = 2025,
  month = may,
  journal = {Computer Science Review},
  volume = {56},
  pages = {100722},
  issn = {15740137},
  doi = {10.1016/j.cosrev.2024.100722},
  urldate = {2025-01-15},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/NHDSYV48/Ratna et al. - 2025 - An inclusive analysis for performance and efficiency of graph neural network models for node classif.pdf}
}

@article{raynalABCRandomForests2019a,
  title = {{{ABC}} Random Forests for {{Bayesian}} Parameter Inference},
  author = {Raynal, Louis and Marin, Jean-Michel and Pudlo, Pierre and Ribatet, Mathieu and Robert, Christian P and Estoup, Arnaud},
  year = 2019,
  month = may,
  journal = {Bioinformatics},
  volume = {35},
  number = {10},
  pages = {1720--1728},
  issn = {1367-4803},
  doi = {10.1093/bioinformatics/bty867},
  urldate = {2026-04-07},
  abstract = {Approximate Bayesian computation (ABC) has grown into a standard methodology that manages Bayesian inference for models associated with intractable likelihood functions. Most ABC implementations require the preliminary selection of a vector of informative statistics summarizing raw data. Furthermore, in almost all existing implementations, the tolerance level that separates acceptance from rejection of simulated parameter values needs to be calibrated.We propose to conduct likelihood-free Bayesian inferences about parameters with no prior selection of the relevant components of the summary statistics and bypassing the derivation of the associated tolerance level. The approach relies on the random forest (RF) methodology of Breiman (2001) applied in a (non-parametric) regression setting. We advocate the derivation of a new RF for each component of the parameter vector of interest. When compared with earlier ABC solutions, this method offers significant gains in terms of robustness to the choice of the summary statistics, does not depend on any type of tolerance level, and is a good trade-off in term of quality of point estimator precision and credible interval estimations for a given computing time. We illustrate the performance of our methodological proposal and compare it with earlier ABC methods on a Normal toy example and a population genetics example dealing with human population evolution.All methods designed here have been incorporated in the R package abcrf (version 1.7.1) available on CRAN.Supplementary data are available at Bioinformatics online.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-07T13:22:59.866Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/LPCYTD2V/Raynal et al. - 2019 - ABC random forests for Bayesian parameter inference.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/78EL4I9A/bty867.html}
}

@article{rebafkaModelbasedClusteringMultiple2023a,
  title = {Model-Based Clustering of Multiple Networks with a Hierarchical Algorithm},
  author = {Rebafka, Tabea},
  year = 2023,
  month = nov,
  journal = {Stat Comput},
  volume = {34},
  number = {1},
  pages = {32},
  issn = {1573-1375},
  doi = {10.1007/s11222-023-10329-w},
  urldate = {2025-12-01},
  abstract = {The paper tackles the problem of clustering multiple networks, directed or not, that do not share the same set of vertices, into groups of networks with similar topology. A statistical model-based approach based on a finite mixture of stochastic block models is proposed. A clustering is obtained by maximizing the integrated classification likelihood criterion. This is done by a hierarchical agglomerative algorithm, that starts from singleton clusters and successively merges clusters of networks. As such, a sequence of nested clusterings is computed that can be represented by a dendrogram providing valuable insights on the collection of networks. Using a Bayesian framework, model selection is performed in an automated way since the algorithm stops when the best number of clusters is attained. The algorithm is computationally efficient, when carefully implemented. The aggregation of clusters requires a means to overcome the label-switching problem of the stochastic block model and to match the block labels of the networks. To address this problem, a new tool is proposed based on a comparison of the graphons of the associated stochastic block models. The clustering approach is assessed on synthetic data. An application to a set of ecological networks illustrates the interpretability of the obtained results.},
  langid = {english},
  keywords = {Agglomerative algorithm,Graph clustering,Graphon distance,Integrated classification likelihood,Multiple networks,Stochastic block model},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-12-01T12:41:21.757Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/LKMJL667/Rebafka - 2023 - Model-based clustering of multiple networks with a hierarchical algorithm.pdf}
}

@incollection{References1990,
  title = {References},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {320--331},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.refs},
  urldate = {2024-09-13},
  isbn = {978-0-470-31680-1},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/YKP9NU4L/1990 - References.pdf}
}

@article{reidAssessingSinglelocusCRISPR2022,
  title = {Assessing Single-Locus {{CRISPR}}/{{Cas9-based}} Gene Drive Variants in the Mosquito {{Aedes}} Aegypti via Single-Generation Crosses and Modeling},
  author = {Reid, William and Williams, Adeline E and {Sanchez-Vargas}, Irma and Lin, Jingyi and Juncu, Rucsanda and Olson, Ken E and Franz, Alexander W E},
  year = 2022,
  month = oct,
  journal = {G3 (Bethesda)},
  volume = {12},
  number = {12},
  pages = {jkac280},
  issn = {2160-1836},
  doi = {10.1093/g3journal/jkac280},
  urldate = {2024-09-04},
  abstract = {The yellow fever mosquito Aedes aegypti is a major vector of arthropod-borne viruses, including dengue, chikungunya, and Zika viruses. A novel approach to mitigate arboviral infections is to generate mosquitoes refractory to infection by overexpressing antiviral effector molecules. Such an approach requires a mechanism to spread these antiviral effectors through a population, for example, by using CRISPR/Cas9-based gene drive systems. Critical to the design of a single-locus autonomous gene drive is that the selected genomic locus is amenable to both gene drive and appropriate expression of the antiviral effector. In our study, we used reverse engineering to target 2 intergenic genomic loci, which had previously shown to be highly permissive for antiviral effector gene expression, and we further investigated the use of 3 promoters (nanos, {$\beta$}2-tubulin, or zpg) for Cas9 expression. We then quantified the accrual of insertions or deletions (indels) after single-generation crossings, measured maternal effects, and assessed fitness costs associated with various transgenic lines to model the rate of gene drive fixation. Overall, MGDrivE modeling suggested that when an autonomous gene drive is placed into an intergenic locus, the gene drive system will eventually be blocked by the accrual of gene drive blocking resistance alleles and ultimately be lost in the population. Moreover, while genomic locus and promoter selection were critically important for the initial establishment of the autonomous gene drive, it was the fitness of the gene drive line that most strongly influenced the persistence of the gene drive in the simulated population. As such, we propose that when autonomous CRISPR/Cas9-based gene drive systems are anchored in an intergenic locus, they temporarily result in a strong population replacement effect, but as gene drive-blocking indels accrue, the gene drive becomes exhausted due to the fixation of CRISPR resistance alleles.},
  pmcid = {PMC9713460},
  pmid = {36250791},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/JVAA8YCV/Reid et al. - 2022 - Assessing single-locus CRISPRCas9-based gene drive variants in the mosquito Aedes aegypti via singl.pdf}
}

@article{rivera-hutinelEffectsSamplingCompleteness2012,
  title = {Effects of Sampling Completeness on the Structure of Plant--Pollinator Networks},
  author = {{Rivera-Hutinel}, A. and Bustamante, R. O. and Mar{\'i}n, V. H. and Medel, R.},
  year = 2012,
  journal = {Ecology},
  volume = {93},
  number = {7},
  pages = {1593--1603},
  issn = {1939-9170},
  doi = {10.1890/11-1803.1},
  urldate = {2025-09-18},
  abstract = {Plant--animal interaction networks provide important information on community organization. One of the most critical assumptions of network analysis is that the observed interaction patterns constitute an adequate sample of the set of interactions present in plant--animal communities. In spite of its importance, few studies have evaluated this assumption, and in consequence, there is no consensus on the sensitivity of network metrics to sampling methodological shortcomings. In this study we examined how variation in sampling completeness influences the estimation of six network metrics frequently used in the literature (connectance, nestedness, modularity, robustness to species loss, path length, and centralization). We analyzed data of 186 flowering plants and 336 pollinator species in 10 networks from a forest-fragmented system in central Chile. Using species-based accumulation curves, we estimated the deviation of network metrics in undersampled communities with respect to exhaustively sampled communities and the effect of network size and sampling evenness on network metrics. Our results indicate that: (1) most metrics were affected by sampling completeness but differed in their sensitivity to sampling effort; (2) nestedness, modularity, and robustness to species loss were less influenced by insufficient sampling than connectance, path length, and centralization; (3) robustness was mildly influenced by sampling evenness. These results caution studies that summarize information from databases with high, or unknown, heterogeneity in sampling effort per species and should stimulate researchers to report sampling intensity to standardize its effects in the search for broad patterns in plant--pollinator networks.},
  copyright = {\copyright{} 2012 by the Ecological Society of America},
  langid = {english},
  keywords = {accumulation curves,Chile,Clench model,ecological networks,Los Ruiles National Reserve,network size,plant-pollinator network metrics,sampling completeness,sampling effort,sampling evenness},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-18T15:47:50.369Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/PJ7NWTIV/Rivera-Hutinel et al. - 2012 - Effects of sampling completeness on the structure of plant–pollinator networks.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/37GFBFZH/11-1803.html}
}

@article{robertWhyApproximateBayesiana,
  title = {Why Approximate {{Bayesian}} Computational ({{ABC}}) Methods Cannot Handle Model Choice Problems},
  author = {Robert, Christian P and Marin, Jean-Michel and Pillai, Natesh S},
  abstract = {Approximate Bayesian computation (ABC), also known as likelihood-free methods, have become a favourite tool for the analysis of complex stochastic models, primarily in population genetics but also in financial analyses. We advocated in Grelaud et al. (2009) the use of ABC for Bayesian model choice in the specific case of Gibbs random fields (GRF), relying on a sufficiency property mainly enjoyed by GRFs to show that the approach was legitimate. Despite having previously suggested the use of ABC for model choice in a wider range of models in the DIY ABC software (Cornuet et al., 2008), we present theoretical evidence that the general use of ABC for model choice is fraught with danger in the sense that no amount of computation, however large, can guarantee a proper approximation of the posterior probabilities of the models under comparison.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-07T13:23:25.869Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/LDPLZGIJ/Robert et al. - 2011 - Why approximate Bayesian computational (ABC) methods cannot handle model choice problems.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/ZU4F5JG4/Robert et al. - Why approximate Bayesian computational (ABC) methods cannot handle model choice problems.pdf}
}

@article{rubin-delanchyStatisticalInterpretationSpectral2022,
  title = {A {{Statistical Interpretation}} of {{Spectral Embedding}}: {{The Generalised Random Dot Product Graph}}},
  shorttitle = {A {{Statistical Interpretation}} of {{Spectral Embedding}}},
  author = {{Rubin-Delanchy}, Patrick and Cape, Joshua and Tang, Minh and Priebe, Carey E.},
  year = 2022,
  month = sep,
  journal = {Journal of the Royal Statistical Society Series B: Statistical Methodology},
  volume = {84},
  number = {4},
  pages = {1446--1473},
  issn = {1369-7412},
  doi = {10.1111/rssb.12509},
  urldate = {2025-07-09},
  abstract = {Spectral embedding is a procedure which can be used to obtain vector representations of the nodes of a graph. This paper proposes a generalisation of the latent position network model known as the random dot product graph, to allow interpretation of those vector representations as latent position estimates. The generalisation is needed to model heterophilic connectivity (e.g. `opposites attract') and to cope with negative eigenvalues more generally. We show that, whether the adjacency or normalised Laplacian matrix is used, spectral embedding produces uniformly consistent latent position estimates with asymptotically Gaussian error (up to identifiability). The standard and mixed membership stochastic block models are special cases in which the latent positions take only K distinct vector values, representing communities, or live in the (K - 1)-simplex with those vertices respectively. Under the stochastic block model, our theory suggests spectral clustering using a Gaussian mixture model (rather than K-means) and, under mixed membership, fitting the minimum volume enclosing simplex, existing recommendations previously only supported under non-negative-definite assumptions. Empirical improvements in link prediction (over the random dot product graph), and the potential to uncover richer latent structure (than posited under the standard or mixed membership stochastic block models) are demonstrated in a cyber-security example.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-07-09T14:21:55.886Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/GFNJXK3T/Rubin-Delanchy et al. - 2022 - A Statistical Interpretation of Spectral Embedding The Generalised Random Dot Product Graph.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/548W89BL/7073272.html}
}

@article{sanchez-lengelingGentleIntroductionGraph2021,
  title = {A {{Gentle Introduction}} to {{Graph Neural Networks}}},
  author = {{Sanchez-Lengeling}, Benjamin and Reif, Emily and Pearce, Adam and Wiltschko, Alexander B.},
  year = 2021,
  month = sep,
  journal = {Distill},
  volume = {6},
  number = {9},
  pages = {e33},
  issn = {2476-0757},
  doi = {10.23915/distill.00033},
  urldate = {2024-05-15},
  abstract = {What components are needed for building learning algorithms that leverage the structure and properties of graphs?},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/4A3V4EFV/gnn-intro.html}
}

@article{sanderWhatCanInteraction2015,
  title = {What {{Can Interaction Webs Tell Us About Species Roles}}?},
  author = {Sander, Elizabeth L. and Wootton, J. Timothy and Allesina, Stefano},
  year = 2015,
  month = jul,
  journal = {PLoS Computational Biology},
  volume = {11},
  number = {7},
  pages = {e1004330},
  doi = {10.1371/journal.pcbi.1004330},
  urldate = {2024-11-04},
  abstract = {The group model is a useful tool to understand broad-scale patterns of interaction in a network, but it has previously been limited in use to food webs, which contain only predator-prey interactions. Natural populations interact with each other in a ...},
  langid = {english},
  pmid = {26197151},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ZMBEJAED/Sander et al. - 2015 - What Can Interaction Webs Tell Us About Species Roles.pdf}
}

@article{schwarzEstimatingDimensionModel1978,
  title = {Estimating the {{Dimension}} of a {{Model}}},
  author = {Schwarz, Gideon},
  year = 1978,
  month = mar,
  journal = {The Annals of Statistics},
  volume = {6},
  number = {2},
  pages = {461--464},
  publisher = {Institute of Mathematical Statistics},
  issn = {0090-5364, 2168-8966},
  doi = {10.1214/aos/1176344136},
  urldate = {2025-01-29},
  abstract = {The problem of selecting one of a number of models of different dimensions is treated by finding its Bayes solution, and evaluating the leading terms of its asymptotic expansion. These terms are a valid large-sample criterion beyond the Bayesian context, since they do not depend on the a priori distribution.},
  keywords = {62F99,62J99,Akaike information criterion,asymptotics,dimension},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/L8CXITBN/Schwarz - 1978 - Estimating the Dimension of a Model.pdf}
}

@article{sewellLatentSpaceModels2015,
  title = {Latent {{Space Models}} for {{Dynamic Networks}}},
  author = {Sewell, Daniel K. and Chen, Yuguo},
  year = 2015,
  month = oct,
  journal = {Journal of the American Statistical Association},
  volume = {110},
  number = {512},
  pages = {1646--1657},
  publisher = {Taylor \& Francis},
  issn = {0162-1459},
  doi = {10.1080/01621459.2014.988214},
  urldate = {2024-05-20},
  keywords = {Embedding,Markov chain Monte Carlo,Network data,Social influence,Visualization},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/6LJMITGR/Sewell et Chen - 2015 - Latent Space Models for Dynamic Networks.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/TJD8ZWBA/01621459.2014.988214.pdf.pdf}
}

@inproceedings{shervashidzeEfficientGraphletKernels2009,
  title = {Efficient Graphlet Kernels for Large Graph Comparison},
  booktitle = {Proceedings of the {{Twelfth International Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
  author = {Shervashidze, Nino and Vishwanathan, S. V. N. and Petri, Tobias and Mehlhorn, Kurt and Borgwardt, Karsten},
  year = 2009,
  month = apr,
  pages = {488--495},
  publisher = {PMLR},
  issn = {1938-7228},
  urldate = {2025-01-26},
  abstract = {State-of-the-art  graph kernels do not scale to large graphs with hundreds of nodes and thousands of edges. In this article we propose to compare graphs by counting graphlets, i.e., subgraphs with kkk nodes where k{$\in$}\textbraceleft 3,4,5\textbraceright k{$\in$}\textbraceleft 3,4,5\textbraceright k \textbackslash in \textbackslash\textbraceleft{} 3, 4, 5 \textbackslash\textbraceright. Exhaustive enumeration of all graphlets being prohibitively expensive, we introduce two theoretically grounded speedup schemes, one based on sampling and the second one specifically designed for bounded degree graphs. In our experimental evaluation, our novel kernels allow us to efficiently compare large graphs that cannot be tackled by existing graph kernels.},
  langid = {english},
  keywords = {a lire},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/32IYRRZ5/Shervashidze et al. - 2009 - Efficient graphlet kernels for large graph comparison.pdf}
}

@article{shervashidzeWeisfeilerLehmanGraphKernels2011,
  title = {Weisfeiler-{{Lehman Graph Kernels}}},
  author = {Shervashidze, Nino and Schweitzer, Pascal and van Leeuwen, Erik Jan and Mehlhorn, Kurt and Borgwardt, Karsten M.},
  year = 2011,
  journal = {Journal of Machine Learning Research},
  volume = {12},
  number = {77},
  pages = {2539--2561},
  issn = {1533-7928},
  urldate = {2025-01-26},
  abstract = {In this article, we propose a family of efficient kernels for large graphs with discrete node labels. Key to our method is a rapid feature extraction scheme based on the Weisfeiler-Lehman test of isomorphism on graphs. It maps the original graph to a sequence of graphs, whose node attributes capture topological and label information. A family of kernels can be defined based on this Weisfeiler-Lehman sequence of graphs, including a highly efficient kernel comparing subtree-like patterns. Its runtime scales only linearly in the number of edges of the graphs and the length of the Weisfeiler-Lehman graph sequence. In our experimental evaluation, our kernels outperform state-of-the-art graph kernels on several graph classification benchmark data sets in terms of accuracy and runtime. Our kernels open the door to large-scale applications of graph kernels in various disciplines such as computational biology and social network analysis.},
  keywords = {a lire},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/F5C332U5/Shervashidze et al. - 2011 - Weisfeiler-Lehman Graph Kernels.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/XLVHPJ32/shervashidze11a.pdf}
}

@article{sheykhaliRobustnessExtinctionPlasticity2020,
  title = {Robustness to Extinction and Plasticity Derived from Mutualistic Bipartite Ecological Networks},
  author = {Sheykhali, Somaye and {Fern{\'a}ndez-Gracia}, Juan and Traveset, Anna and Ziegler, Maren and Voolstra, Christian R. and Duarte, Carlos M. and Egu{\'i}luz, V{\'i}ctor M.},
  year = 2020,
  month = jun,
  journal = {Sci Rep},
  volume = {10},
  number = {1},
  pages = {9783},
  publisher = {Nature Publishing Group},
  issn = {2045-2322},
  doi = {10.1038/s41598-020-66131-5},
  urldate = {2025-09-18},
  abstract = {Understanding the response of ecological networks to perturbations and disruptive events is needed to anticipate the biodiversity loss and extinction cascades. Here, we study how network plasticity reshapes the topology of mutualistic networks in response to species loss. We analyze more than one hundred empirical mutualistic networks and considered random and targeted removal as mechanisms of species extinction. Network plasticity is modeled as either random rewiring, as the most parsimonious approach, or resource affinity-driven rewiring, as a proxy for encoding the phylogenetic similarity and functional redundancy among species. This redundancy should be positively correlated with the robustness of an ecosystem, as functions can be taken by other species once one of them is extinct. We show that effective modularity, i.e. the ability of an ecosystem to adapt or restructure, increases with increasing numbers of extinctions, and with decreasing the replacement probability. Importantly, modularity is mostly affected by the extinction rather than by rewiring mechanisms. These changes in community structure are reflected in the robustness and stability due to their positive correlation with modularity. Resource affinity-driven rewiring offers an increase of modularity, robustness, and stability which could be an evolutionary favored mechanism to prevent a cascade of co-extinctions.},
  copyright = {2020 The Author(s)},
  langid = {english},
  keywords = {Biodiversity,Complex networks,Ecological networks},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-18T14:43:11.755Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/YVCBAJYI/Sheykhali et al. - 2020 - Robustness to extinction and plasticity derived from mutualistic bipartite ecological networks.pdf}
}

@article{simmonsMotifsBipartiteEcological2019,
  title = {Motifs in Bipartite Ecological Networks: Uncovering Indirect Interactions},
  shorttitle = {Motifs in Bipartite Ecological Networks},
  author = {Simmons, Benno I. and Cirtwill, Alyssa R. and Baker, Nick J. and Wauchope, Hannah S. and Dicks, Lynn V. and Stouffer, Daniel B. and Sutherland, William J.},
  year = 2019,
  month = jan,
  journal = {Oikos},
  volume = {128},
  number = {2},
  pages = {154--170},
  issn = {0030-1299, 1600-0706},
  doi = {10.1111/oik.05670},
  urldate = {2025-04-10},
  abstract = {Indirect interactions play an essential role in governing population, community and coevolutionary dynamics across a diverse range of ecological communities. Such communities are widely represented as bipartite networks: graphs depicting interactions between two groups of species, such as plants and pollinators or hosts and parasites. For over thirty years, studies have used indices, such as connectance and species degree, to characterise the structure of these networks and the roles of their constituent species. However, compressing a complex network into a single metric necessarily discards large amounts of information about indirect interactions. Given the large literature demonstrating the importance and ubiquity of indirect effects, many studies of network structure are likely missing a substantial piece of the ecological puzzle. Here we use the emerging concept of bipartite motifs to outline a new framework for bipartite networks that incorporates indirect interactions. While this framework is a significant departure from the current way of thinking about bipartite ecological networks, we show that this shift is supported by analyses of simulated and empirical data. We use simulations to show how consideration of indirect interactions can highlight differences missed by the current index paradigm that may be ecologically important. We extend this finding to empirical plant--pollinator communities, showing how two bee species, with similar direct interactions, differ in how specialised their competitors are. These examples underscore the need to not rely solely on network- and species-level indices for characterising the structure of bipartite ecological networks.},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/3BFRNIK2/Simmons et al. - 2019 - Motifs in bipartite ecological networks uncovering indirect interactions.pdf}
}

@article{snijdersEstimationPredictionStochastic1997,
  title = {Estimation and {{Prediction}} for {{Stochastic Blockmodels}} for {{Graphs}} with {{Latent Block Structure}}},
  author = {Snijders, Tom A.B. and Nowicki, Krzysztof},
  year = 1997,
  month = jan,
  journal = {J. of Classification},
  volume = {14},
  number = {1},
  pages = {75--100},
  issn = {1432-1343},
  doi = {10.1007/s003579900004},
  urldate = {2023-06-15},
  abstract = {blockmodeling for graphs is proposed. The model assumes that the vertices of the graph are partitioned into two unknown blocks and that the probability of an edge between two vertices depends only on the blocks to which they belong. Statistical procedures are derived for estimating the probabilities of edges and for predicting the block structure from observations of the edge pattern only. ML estimators can be computed using the EM algorithm, but this strategy is practical only for small graphs. A Bayesian estimator, based on the Gibbs sampling, is proposed. This estimator is practical also for large graphs. When ML estimators are used, the block structure can be predicted based on predictive likelihood. When Gibbs sampling is used, the block structure can be predicted from posterior predictive probabilities. A side result is that when the number of vertices tends to infinity while the probabilities remain constant, the block structure can be recovered correctly with probability tending to 1.},
  langid = {english},
  keywords = {Bayesian Estimator,Block Structure,Gibbs Sampling,Large Graph,Statistical Procedure},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/2GYRASW5/snijders1997.pdf.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/JJNQV32Y/Snijders et Nowicki - 1997 - Estimation and Prediction for Stochastic Blockmode.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/LXGG9SRP/snijders1997.pdf.pdf}
}

@article{souzaTemporalVariationPlant2018,
  title = {Temporal Variation in Plant--Pollinator Networks from Seasonal Tropical Environments: {{Higher}} Specialization When Resources Are Scarce},
  shorttitle = {Temporal Variation in Plant--Pollinator Networks from Seasonal Tropical Environments},
  author = {Souza, Camila S. and Maruyama, Pietro K. and Aoki, Camila and Sigrist, Maria R. and Raizer, Josu{\'e} and Gross, Caroline L. and {de Araujo}, Andr{\'e}a C.},
  year = 2018,
  journal = {Journal of Ecology},
  volume = {106},
  number = {6},
  pages = {2409--2420},
  issn = {1365-2745},
  doi = {10.1111/1365-2745.12978},
  urldate = {2025-03-24},
  abstract = {The temporal dynamics of plant phenology and pollinator abundance across seasons should influence the structure of plant--pollinator interaction networks. Nevertheless, such dynamics are seldom considered, especially for diverse tropical networks. Here, we evaluated the temporal variation of four plant--pollinator networks in two seasonal ecosystems in Central Brazil (Cerrado and Pantanal). Data were gathered on a monthly basis over 1 year for each network. We characterized seasonal and temporal shifts in plant--pollinator interactions, using temporally discrete networks. We predicted that the greater floral availability in the rainy season would allow for finer partitioning of the floral niche by the pollinators, i.e. higher specialization patterns as previously described across large spatial gradients. Finally, we also evaluated how sampling restricted to peak flowering period may affect the characterization of the networks. Contrary to our expectations, we found that dry season networks, although characterized by lower floral resource richness and abundance, showed higher levels of network-wide interaction partitioning (complementary specialization and modularity). For nestedness, though, this between-seasons difference was not consistent. Reduced resource availability in the dry season may promote higher interspecific competition among pollinators leading to reduced niche overlap, thus explaining the increase in specialization. There were no consistent differences between seasons in species-level indices, indicating that higher network level specialization is an emergent property only seen when considering the entire network. However, bees presented higher values of specialization and species strength in relation to other groups such as flies and wasps, suggesting that some plant species frequently associated with bees are used only by this group. Our study also indicates that targeted data collection during peak flowering generates higher estimates of network specialization, possibly because species activity spans longer periods than the targeted time frame. Hence, depending on the period of data collection, different structural values for the networks of interactions may be found. Synthesis. Plant--pollinator networks from tropical environments have structural properties that vary according to seasons, which should be taken into account in the description of the complex systems of interactions between plants and their pollinators in these areas.},
  copyright = {\copyright{} 2018 The Authors. Journal of Ecology \copyright{} 2018 British Ecological Society},
  langid = {english},
  keywords = {Cerrado,functional diversity,modularity,nestedness,network sampling,Pantanal,resource availability,seasonality},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/ABKWLF45/Souza et al. - 2018 - Temporal variation in plant–pollinator networks from seasonal tropical environments Higher speciali.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/KGWHZ5H2/1365-2745.html}
}

@incollection{SubjectIndex1990,
  title = {Subject {{Index}}},
  booktitle = {Finding {{Groups}} in {{Data}}},
  year = 1990,
  pages = {335--342},
  publisher = {John Wiley \& Sons, Ltd},
  doi = {10.1002/9780470316801.indsub},
  urldate = {2024-09-13},
  isbn = {978-0-470-31680-1},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/FWYM943T/1990 - Subject Index.pdf}
}

@misc{Sujetthese,
  title = {Sujet-These},
  file = {/home/polarolouis/Nextcloud/Documents/Thèse/Administratif/candidature-these/Sujet.pdf}
}

@misc{thebaultDatabasePlantpollinatorNetworks2020,
  title = {A Database of Plant-Pollinator Networks},
  author = {Th{\'e}bault, Elisa and Fontaine, Colin},
  year = 2020,
  month = dec,
  publisher = {Zenodo},
  doi = {10.5281/zenodo.4300427},
  urldate = {2023-06-21},
  abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
  keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction}
}

@misc{thebaultelisaDatabasePlantpollinatorNetworks2022,
  title = {A Database of Plant-Pollinator Networks},
  author = {Th{\'e}bault, Elisa and Fontaine, Colin},
  year = 2022,
  month = jun,
  publisher = {Zenodo},
  doi = {10.5281/ZENODO.4300426},
  urldate = {2023-06-21},
  abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
  collaborator = {Dor{\'e}, Ma{\"e}l and Parra, Santiago},
  copyright = {Creative Commons Attribution 4.0 International, Open Access},
  keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction}
}

@inproceedings{togninalliWassersteinWeisfeilerLehmanGraph2019,
  title = {Wasserstein {{Weisfeiler-Lehman Graph Kernels}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Togninalli, Matteo and Ghisu, Elisabetta and {Llinares-L{\'o}pez}, Felipe and Rieck, Bastian and Borgwardt, Karsten},
  year = 2019,
  volume = {32},
  publisher = {Curran Associates, Inc.},
  urldate = {2025-01-26},
  abstract = {Most graph kernels are an instance of the class of R-Convolution kernels, which measure the similarity of objects by comparing their substructures. Despite their empirical success, most graph kernels use a naive aggregation of the final set of substructures, usually a sum or average, thereby potentially discarding valuable information about the distribution of individual components. Furthermore, only a limited instance of these approaches can be extended to continuously attributed graphs.  We propose a novel method that relies on the Wasserstein distance between the node feature vector distributions of two graphs, which allows to find subtler differences in data sets by considering graphs as high-dimensional objects, rather than simple means. We further propose a Weisfeiler--Lehman inspired embedding scheme for graphs with continuous node attributes and weighted edges, enhance it with the computed Wasserstein distance, and thus improve the state-of-the-art prediction performance on several graph classification tasks.},
  keywords = {a lire},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/Y7NPRCAD/Togninalli et al. - 2019 - Wasserstein Weisfeiler-Lehman Graph Kernels.pdf}
}

@article{TP1Sciences,
  title = {{TP 1 de Sciences des donn\'ees : apprentissage statistique}},
  langid = {french},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5U5PQR63/Éléments de correction du TP 1.pdf}
}

@article{TP2Sciences,
  title = {{TP 2 de Sciences des donn\'ees : apprentissage statistique (\'el\'ements de correction)}},
  journal = {' '},
  langid = {french},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/4VMMDXNG/TP 2 de Sciences des données  apprentissage statistique (éléments de correction).pdf}
}

@article{TP3Sciences,
  title = {{TP 3 de Sciences des donn\'ees : apprentissage statistique}},
  langid = {french},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/7UH85NBN/TP 3 de Sciences des données  apprentissage statistique.pdf}
}

@article{trojelsgaardMacroecologyPollinationNetworks2013,
  title = {Macroecology of Pollination Networks},
  author = {Tr{\o}jelsgaard, Kristian and Olesen, Jens M.},
  year = 2013,
  journal = {Global Ecology and Biogeography},
  volume = {22},
  number = {2},
  pages = {149--162},
  issn = {1466-8238},
  doi = {10.1111/j.1466-8238.2012.00777.x},
  urldate = {2025-03-24},
  abstract = {Aim Interacting communities of species are organized into complex networks, and network analysis is reckoned to be a strong tool for describing their architecture. Many species assemblies show strong macroecological patterns, e.g. increasing species richness with decreasing latitude, but whether this latitudinal diversity gradient scales up to entities as complex as networks is unknown. We investigated this using a dataset of 54 community-wide pollination networks and hypothesized that pollination networks would display a latitudinal and altitudinal species richness gradient, increasing specialization towards the tropics, and that network topology would be affected by current climate. Location Global. Methods Each network was organized as a presence/absence matrix, consisting of P plant species, A pollinator species and their links. From these matrices, network parameters were estimated. Additionally, data about geography (latitude, elevation), climate at the network site (temperature, precipitation) and sampling effort (observation days) and extent (study-plot size) were gathered. Analyses were done using simultaneous autoregressive modelling (SAR). Results Species richness did not vary strongly with either latitude or elevation. However, network modularity decreased significantly with latitude whereas mean number of links per plant species (L p) and A/P ratio peaked at mid-latitude. Above 500 m a.s.l., A/P ratio decreased and mean number of links per pollinator species (L a) increased with elevation. L p displayed mid-ambient peaks with temperature and nestedness and modularity displayed linear relationships with precipitation. Main conclusion Pollination networks showed macroecological patterns. No strong latitudinal or altitudinal gradient in species richness was observed. L p and the A/P ratio peaked at mid-latitude whereas modularity decreased linearly. Both patterns are suggestive of a more specialized interaction structure towards the tropics. In particular, mean annual precipitation appeared influential on network topology as both nestedness and modularity varied significantly. Importantly, corrected regressions suggest that neither sampling effort nor extent affected the observed patterns.},
  copyright = {\copyright{} 2012 Blackwell Publishing Ltd},
  langid = {english},
  keywords = {Climate change,ecological networks,geographical gradients,macroecology,pollination,sampling effort,species interactions},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/P6MW22VI/Trøjelsgaard et Olesen - 2013 - Macroecology of pollination networks.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/JKFY35H2/j.1466-8238.2012.00777.html}
}

@article{turnerTutorialApproximateBayesian2012a,
  title = {A Tutorial on Approximate {{Bayesian}} Computation},
  author = {Turner, Brandon M. and Van Zandt, Trisha},
  year = 2012,
  month = apr,
  journal = {Journal of Mathematical Psychology},
  volume = {56},
  number = {2},
  pages = {69--85},
  issn = {00222496},
  doi = {10.1016/j.jmp.2012.02.005},
  urldate = {2026-05-05},
  abstract = {This tutorial explains the foundation of approximate Bayesian computation (ABC), an approach to Bayesian inference that does not require the specification of a likelihood function, and hence that can be used to estimate posterior distributions of parameters for simulation-based models. We discuss briefly the philosophy of Bayesian inference and then present several algorithms for ABC. We then apply these algorithms in a number of examples. For most of these examples, the posterior distributions are known, and so we can compare the estimated posteriors derived from ABC to the true posteriors and verify that the algorithms recover the true posteriors accurately. We also consider a popular simulation-based model of recognition memory (REM) for which the true posteriors are unknown. We conclude with a number of recommendations for applying ABC methods to solve real-world problems.},
  copyright = {https://www.elsevier.com/tdm/userlicense/1.0/},
  langid = {english},
  keywords = {abc,Approximate Bayesian Computation,Bayesian inference,tutorial},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-05-05T08:49:17.248Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/N4E59NBM/Turner et Van Zandt - 2012 - A tutorial on approximate Bayesian computation.pdf}
}

@misc{velickovicGraphAttentionNetworks2018,
  title = {Graph {{Attention Networks}}},
  author = {Veli{\v c}kovi{\'c}, Petar and Cucurull, Guillem and Casanova, Arantxa and Romero, Adriana and Li{\`o}, Pietro and Bengio, Yoshua},
  year = 2018,
  month = feb,
  number = {arXiv:1710.10903},
  eprint = {1710.10903},
  primaryclass = {cs, stat},
  publisher = {arXiv},
  urldate = {2024-05-14},
  abstract = {We present graph attention networks (GATs), novel neural network architectures that operate on graph-structured data, leveraging masked self-attentional layers to address the shortcomings of prior methods based on graph convolutions or their approximations. By stacking layers in which nodes are able to attend over their neighborhoods' features, we enable (implicitly) specifying different weights to different nodes in a neighborhood, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront. In this way, we address several key challenges of spectral-based graph neural networks simultaneously, and make our model readily applicable to inductive as well as transductive problems. Our GAT models have achieved or matched state-of-theart results across four established transductive and inductive graph benchmarks: the Cora, Citeseer and Pubmed citation network datasets, as well as a proteinprotein interaction dataset (wherein test graphs remain unseen during training).},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Computer Science - Social and Information Networks,Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/8LZB7KTU/Veličković et al. - 2018 - Graph Attention Networks.pdf}
}

@article{vishwanathanGraphKernels2010,
  title = {Graph {{Kernels}}},
  author = {Vishwanathan, S. V. N. and Schraudolph, Nicol N. and Kondor, Risi and Borgwardt, Karsten M.},
  year = 2010,
  month = aug,
  journal = {J. Mach. Learn. Res.},
  volume = {11},
  pages = {1201--1242},
  issn = {1532-4435},
  abstract = {We present a unified framework to study graph kernels, special cases of which include the random walk (G\"artner et al., 2003; Borgwardt et al., 2005) and marginalized (Kashima et al., 2003, 2004; Mah\'et al., 2004) graph kernels. Through reduction to a Sylvester equation we improve the time complexity of kernel computation between unlabeled graphs with n vertices from O(n6) to O(n3). We find a spectral decomposition approach even more efficient when computing entire kernel matrices. For labeled graphs we develop conjugate gradient and fixed-point methods that take O(dn3) time per iteration, where d is the size of the label set. By extending the necessary linear algebra to Reproducing Kernel Hilbert Spaces (RKHS) we obtain the same result for d-dimensional edge kernels, and O(n4) in the infinite-dimensional case; on sparse graphs these algorithms only take O(n2) time per iteration in all cases. Experiments on graphs from bioinformatics and other application domains show that these techniques can speed up computation of the kernel by an order of magnitude or more. We also show that certain rational kernels (Cortes et al., 2002, 2003, 2004) when specialized to graphs reduce to our random walk graph kernel. Finally, we relate our framework to R-convolution kernels (Haussler, 1999) and provide a kernel that is close to the optimal assignment kernel of kernel of Fr\"ohlich et al. (2006) yet provably positive semi-definite.},
  keywords = {a lire},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/9MPIL9VL/vishwanathan10a.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/CZD5F9CD/Vishwanathan et al. - 2010 - Graph Kernels.pdf}
}

@misc{wangAmortizedProbabilisticDetection2024,
  title = {Amortized {{Probabilistic Detection}} of {{Communities}} in {{Graphs}}},
  author = {Wang, Yueqi and Lee, Yoonho and Basu, Pallab and Lee, Juho and Teh, Yee Whye and Paninski, Liam and Pakman, Ari},
  year = 2024,
  month = aug,
  number = {arXiv:2010.15727},
  eprint = {2010.15727},
  primaryclass = {stat},
  doi = {10.48550/arXiv.2010.15727},
  urldate = {2026-04-16},
  abstract = {Learning community structures in graphs has broad applications across scientific domains. While graph neural networks (GNNs) have been successful in encoding graph structures, existing GNN-based methods for community detection are limited by requiring knowledge of the number of communities in advance, in addition to lacking a proper probabilistic formulation to handle uncertainty. We propose a simple framework for amortized community detection, which addresses both of these issues by combining the expressive power of GNNs with recent methods for amortized clustering. Our models consist of a graph representation backbone that extracts structural information and an amortized clustering network that naturally handles variable numbers of clusters. Both components combine into well-defined models of the posterior distribution of graph communities and are jointly optimized given labeled graphs. At inference time, the models yield parallel samples from the posterior of community labels, quantifying uncertainty in a principled way. We evaluate several models from our framework on synthetic and real datasets, and demonstrate improved performance compared to previous methods. As a separate contribution, we extend recent amortized probabilistic clustering architectures by adding attention modules, which yield further improvements on community detection tasks.},
  archiveprefix = {arXiv},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-16T12:33:33.999Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/U4MW8BCL/Wang et al. - 2024 - Amortized Probabilistic Detection of Communities in Graphs.pdf}
}

@misc{wangNeuralEntropicGromovWasserstein2023,
  title = {Neural {{Entropic Gromov-Wasserstein Alignment}}},
  author = {Wang, Tao and Goldfeld, Ziv},
  year = 2023,
  month = dec,
  number = {arXiv:2312.07397},
  eprint = {2312.07397},
  primaryclass = {math},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2312.07397},
  urldate = {2025-06-11},
  abstract = {The Gromov-Wasserstein (GW) distance, rooted in optimal transport (OT) theory, provides a natural framework for aligning heterogeneous datasets. Alas, statistical estimation of the GW distance suffers from the curse of dimensionality and its exact computation is NP hard. To circumvent these issues, entropic regularization has emerged as a remedy that enables parametric estimation rates via plug-in and efficient computation using Sinkhorn iterations. Motivated by further scaling up entropic GW (EGW) alignment methods to data dimensions and sample sizes that appear in modern machine learning applications, we propose a novel neural estimation approach. Our estimator parametrizes a minimax semi-dual representation of the EGW distance by a neural network, approximates expectations by sample means, and optimizes the resulting empirical objective over parameter space. We establish non-asymptotic error bounds on the EGW neural estimator of the alignment cost and optimal plan. Our bounds characterize the effective error in terms of neural network (NN) size and the number of samples, revealing optimal scaling laws that guarantee parametric convergence. The bounds hold for compactly supported distributions, and imply that the proposed estimator is minimax-rate optimal over that class. Numerical experiments validating our theory are also provided.},
  archiveprefix = {arXiv},
  keywords = {Mathematics - Statistics Theory,Statistics - Statistics Theory},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-06-11T15:49:30.151Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5WPFGMCN/Wang et Goldfeld - 2023 - Neural Entropic Gromov-Wasserstein Alignment.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/D3QJ7SQR/2312.html}
}

@misc{WebLifeEcological2022,
  title = {Web of {{Life}}: Ecological Networks Database},
  year = 2022,
  month = jul,
  urldate = {2023-06-17},
  howpublished = {https://www.web-of-life.es/map.php},
  keywords = {networks,site},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/9WZE8QLQ/map.html;/home/louis/snap/zotero-snap/common/Zotero/storage/RLHUSJZY/map.html}
}

@misc{williamsSimplextoEuclideanBijectionConjugate2026,
  title = {Simplex-to-{{Euclidean Bijection}} for {{Conjugate}} and {{Calibrated Multiclass Gaussian Process}}},
  author = {Williams, Bernardo and Tetali, Harsha Vardhan and Klami, Arto and Hartmann, Marcelo},
  year = 2026,
  month = mar,
  number = {arXiv:2603.16621},
  eprint = {2603.16621},
  primaryclass = {cs},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2603.16621},
  urldate = {2026-04-12},
  abstract = {We propose a conjugate and calibrated Gaussian process (GP) model for multi-class classification by exploiting the geometry of the probability simplex. Our approach uses Aitchison geometry to map simplex-valued class probabilities to an unconstrained Euclidean representation, turning classification into a GP regression problem with fewer latent dimensions than standard multi-class GP classifiers. This yields conjugate inference and reliable predictive probabilities without relying on distributional approximations in the model construction. The method is compatible with standard sparse GP regression techniques, enabling scalable inference on larger datasets. Empirical results show well-calibrated and competitive performance across synthetic and real-world datasets.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Computer Science - Machine Learning},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-12T17:33:08.131Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/EHNNU3GX/Williams et al. - 2026 - Simplex-to-Euclidean Bijection for Conjugate and Calibrated Multiclass Gaussian Process.pdf}
}

@article{willsMetricsGraphComparison2020,
  title = {Metrics for Graph Comparison: {{A}} Practitioner's Guide},
  shorttitle = {Metrics for Graph Comparison},
  author = {Wills, Peter and Meyer, Fran{\c c}ois G.},
  year = 2020,
  month = feb,
  journal = {PLOS ONE},
  volume = {15},
  number = {2},
  pages = {e0228728},
  publisher = {Public Library of Science},
  issn = {1932-6203},
  doi = {10.1371/journal.pone.0228728},
  urldate = {2025-05-20},
  abstract = {Comparison of graph structure is a ubiquitous task in data analysis and machine learning, with diverse applications in fields such as neuroscience, cyber security, social network analysis, and bioinformatics, among others. Discovery and comparison of structures such as modular communities, rich clubs, hubs, and trees yield insight into the generative mechanisms and functional properties of the graph. Often, two graphs are compared via a pairwise distance measure, with a small distance indicating structural similarity and vice versa. Common choices include spectral distances and distances based on node affinities. However, there has of yet been no comparative study of the efficacy of these distance measures in discerning between common graph topologies at different structural scales. In this work, we compare commonly used graph metrics and distance measures, and demonstrate their ability to discern between common topological features found in both random graph models and real world networks. We put forward a multi-scale picture of graph structure wherein we study the effect of global and local structures on changes in distance measures. We make recommendations on the applicability of different distance measures to the analysis of empirical graph data based on this multi-scale view. Finally, we introduce the Python library NetComp that implements the graph distances used in this work.},
  langid = {english},
  keywords = {Community structure,Connectomics,Distance measurement,Eigenvalues,Mathematical models,Neural networks,Random graphs,Schools},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-20T12:22:29.607Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/YHGXNCVV/Wills et Meyer - 2020 - Metrics for graph comparison A practitioner’s guide.pdf}
}

@article{wilmsTreebasedNodeAggregation2022,
  title = {Tree-Based {{Node Aggregation}} in {{Sparse Graphical Models}}},
  author = {Wilms, Ines and Bien, Jacob},
  year = 2022,
  journal = {Journal of Machine Learning Research},
  volume = {23},
  number = {243},
  pages = {1--36},
  issn = {1533-7928},
  urldate = {2025-10-14},
  abstract = {High-dimensional graphical models are often estimated using regularization that is aimed at reducing the number of edges in a network. In this work, we show how even simpler networks can be produced by aggregating the nodes of the graphical model. We develop a new convex regularized method, called the tree-aggregated graphical lasso or tag-lasso, that estimates graphical models that are both edge-sparse and node-aggregated. The aggregation is performed in a data-driven fashion by leveraging side information in the form of a tree that encodes node similarity and facilitates the interpretation of the resulting aggregated nodes. We provide an efficient implementation of the tag-lasso by using the locally adaptive alternating direction method of multipliers and illustrate our proposal's practical advantages in simulation and in applications in finance and biology.},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-10-14T12:07:34.769Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/V5KN62CY/Wilms et Bien - 2022 - Tree-based Node Aggregation in Sparse Graphical Models.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/4A8U5CT3/21-0105.html}
}

@article{wittes331NoteBias1972,
  title = {331. {{Note}}: {{On}} the {{Bias}} and {{Estimated Variance}} of {{Chapman}}'s {{Two-Sample Capture-Recapture Population Estimate}}},
  shorttitle = {331. {{Note}}},
  author = {Wittes, Janet T.},
  year = 1972,
  journal = {Biometrics},
  volume = {28},
  number = {2},
  eprint = {2556173},
  eprinttype = {jstor},
  pages = {592--597},
  publisher = {[Wiley, International Biometric Society]},
  issn = {0006-341X},
  doi = {10.2307/2556173},
  urldate = {2025-01-16},
  abstract = {This note demonstrates that Chapman's estimate NU for population size in a two-sample capture-recapture experiment is unbiased when the sum of the sample sizes is at least as great as the population size. Further, an estimate of the variance of NU is proposed and is shown to be unbiased when n1 + n2 {$\geq$} N.},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/FDTJ86EU/Wittes - 1972 - 331. Note On the Bias and Estimated Variance of Chapman's Two-Sample Capture-Recapture Population E.pdf}
}

@misc{xuHowPowerfulAre2019,
  title = {How {{Powerful}} Are {{Graph Neural Networks}}?},
  author = {Xu, Keyulu and Hu, Weihua and Leskovec, Jure and Jegelka, Stefanie},
  year = 2019,
  month = feb,
  number = {arXiv:1810.00826},
  eprint = {1810.00826},
  primaryclass = {cs, stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1810.00826},
  urldate = {2024-05-14},
  abstract = {Graph Neural Networks (GNNs) are an effective framework for representation learning of graphs. GNNs follow a neighborhood aggregation scheme, where the representation vector of a node is computed by recursively aggregating and transforming representation vectors of its neighboring nodes. Many GNN variants have been proposed and have achieved state-of-the-art results on both node and graph classification tasks. However, despite GNNs revolutionizing graph representation learning, there is limited understanding of their representational properties and limitations. Here, we present a theoretical framework for analyzing the expressive power of GNNs to capture different graph structures. Our results characterize the discriminative power of popular GNN variants, such as Graph Convolutional Networks and GraphSAGE, and show that they cannot learn to distinguish certain simple graph structures. We then develop a simple architecture that is provably the most expressive among the class of GNNs and is as powerful as the Weisfeiler-Lehman graph isomorphism test. We empirically validate our theoretical findings on a number of graph classification benchmarks, and demonstrate that our model achieves state-of-the-art performance.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/THBD5QV3/Xu et al. - 2019 - How Powerful are Graph Neural Networks.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/ZJF5UWIH/1810.html}
}

@article{xuUnderstandingGraphEmbedding2021,
  title = {Understanding {{Graph Embedding Methods}} and {{Their Applications}}},
  author = {Xu, Mengjia},
  year = 2021,
  month = jan,
  journal = {SIAM Rev.},
  volume = {63},
  number = {4},
  pages = {825--853},
  issn = {0036-1445, 1095-7200},
  doi = {10.1137/20M1386062},
  urldate = {2025-09-19},
  abstract = {Graph analytics can lead to better quantitative understanding and control of complex networks, but traditional methods suffer from the high computational cost and excessive memory requirements associated with the high-dimensionality and heterogeneous characteristics of industrial size networks. Graph embedding techniques can be effective in converting high-dimensional sparse graphs into low-dimensional, dense, and continuous vector spaces, preserving maximally the graph structure properties. Another type of emerging graph embedding employs Gaussian distribution--based graph embedding with important uncertainty estimation. The main goal of graph embedding methods is to pack every node's properties into a vector with a smaller dimension; hence, node similarity in the original complex irregular spaces can be easily quantified in the embedded vector spaces using standard metrics. The nonlinear and highly informative graph embeddings generated in the latent space can be conveniently used to address different downstream graph analytics tasks (e.g., node classification, link prediction, community detection, visualization, etc.). In this review, we present some fundamental concepts in graph analytics and graph embedding methods, focusing in particular on random walk--based and neural network--based methods. We also discuss the emerging deep learning--based dynamic graph embedding methods. We highlight the distinct advantages of graph embedding methods in four diverse applications, and we present implementation details and references to open-source software as well as available databases in the supplementary material to help interested readers start their exploration into graph analytics.},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-19T12:36:55.370Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/WLYYXUGK/Xu - 2021 - Understanding Graph Embedding Methods and Their Applications.pdf}
}

@article{yangDeepLatentSpace2024,
  title = {A {{Deep Latent Space Model}} for {{Graph Representation Learning}}},
  author = {Yang, Hanxuan and Kong, Qingchao and Mao, Wenji},
  year = 2024,
  month = apr,
  journal = {Neurocomputing},
  volume = {576},
  eprint = {2106.11721},
  primaryclass = {cs, stat},
  pages = {127342},
  issn = {09252312},
  doi = {10.1016/j.neucom.2024.127342},
  urldate = {2024-05-20},
  abstract = {Graph representation learning is a fundamental problem for modeling relational data and benefits a number of downstream applications. Traditional Bayesian-based graph models and recent deep learning based GNN either suffer from impracticability or lack interpretability, thus combined models for undirected graphs have been proposed to overcome the weaknesses. As a large portion of real-world graphs are directed graphs (of which undirected graphs are special cases), in this paper, we propose a Deep Latent Space Model (DLSM) for directed graphs to incorporate the traditional latent variable based generative model into deep learning frameworks. Our proposed model consists of a graph convolutional network (GCN) encoder and a stochastic decoder, which are layer-wise connected by a hierarchical variational auto-encoder architecture. By specifically modeling the degree heterogeneity using node random factors, our model possesses better interpretability in both community structure and degree heterogeneity. For fast inference, the stochastic gradient variational Bayes (SGVB) is adopted using a non-iterative recognition model, which is much more scalable than traditional MCMC-based methods. The experiments on real-world datasets show that the proposed model achieves the state-of-the-art performances on both link prediction and community detection tasks while learning interpretable node embeddings. The source code is available at https://github.com/upperr/DLSM.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/XNVMI2D7/Yang et al. - 2024 - A Deep Latent Space Model for Graph Representation.pdf}
}

@misc{yumpu.comInsectPollinatorsMer,
  title = {Insect Pollinators of the {{Mer Bleue}} Peat Bog of {{Ottawa}} - {{Biodiversity}} ...},
  author = {Yumpu.com},
  journal = {yumpu.com},
  urldate = {2023-08-06},
  abstract = {Insect pollinators of the Mer Bleue peat bog of Ottawa - Biodiversity ...},
  howpublished = {https://www.yumpu.com/en/document/view/11762821/insect-pollinators-of-the-mer-bleue-peat-bog-of-ottawa-biodiversity-},
  langid = {english},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/DIXT2PYL/insect-pollinators-of-the-mer-bleue-peat-bog-of-ottawa-biodiversity-.html}
}

@article{yurdemFederatedLearningOverview2024,
  title = {Federated Learning: {{Overview}}, Strategies, Applications, Tools and Future Directions},
  shorttitle = {Federated Learning},
  author = {Yurdem, Betul and Kuzlu, Murat and Gullu, Mehmet Kemal and Catak, Ferhat Ozgur and Tabassum, Maliha},
  year = 2024,
  month = oct,
  journal = {Heliyon},
  volume = {10},
  number = {19},
  pages = {e38137},
  issn = {24058440},
  doi = {10.1016/j.heliyon.2024.e38137},
  urldate = {2026-04-01},
  langid = {english},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2026-04-01T09:10:19.764Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/CD8LFI6P/Yurdem et al. - 2024 - Federated learning Overview, strategies, applications, tools and future directions.pdf}
}

@article{zhouPredictingMissingLinks2009,
  title = {Predicting Missing Links via Local Information},
  author = {Zhou, Tao and L{\"u}, Linyuan and Zhang, Yi-Cheng},
  year = 2009,
  month = oct,
  journal = {Eur. Phys. J. B},
  volume = {71},
  number = {4},
  pages = {623--630},
  issn = {1434-6028, 1434-6036},
  doi = {10.1140/epjb/e2009-00335-8},
  urldate = {2025-04-11},
  abstract = {Missing link prediction in networks is of both theoretical interest and practical significance in modern science. In this paper, we empirically investigate a simple framework of link prediction on the basis of node similarity. We compare nine well-known local similarity measures on six real networks. The results indicate that the simplest measure, namely Common Neighbours, has the best overall performance, and the Adamic-Adar index performs second best. A new similarity measure, motivated by the resource allocation process taking place on networks, is proposed and shown to have higher prediction accuracy than common neighbours. It is found that many links are assigned the same scores if only the information of the nearest neighbours is used. We therefore design another new measure exploiting information on the next nearest neighbours, which can remarkably enhance the prediction accuracy.},
  copyright = {http://www.springer.com/tdm},
  langid = {english},
  keywords = {05.65.+b Self-organized systems,89.75.-k Complex systems},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/XMRKAJWG/Zhou et al. - 2009 - Predicting missing links via local information.pdf}
}

@misc{zhouStochasticVariationalMethods2022,
  title = {Stochastic {{Variational Methods}} in {{Generalized Hidden Semi-Markov Models}} to {{Characterize Functionality}} in {{Random Heteropolymers}}},
  author = {Zhou, Yun and Gong, Boying and Jiang, Tao and Xu, Ting and Huang, Haiyan},
  year = 2022,
  month = jul,
  number = {arXiv:2207.01813},
  eprint = {2207.01813},
  primaryclass = {q-bio},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2207.01813},
  urldate = {2025-12-19},
  abstract = {Recent years have seen substantial advances in the development of biofunctional materials using synthetic polymers. The growing problem of elusive sequence-functionality relations for most biomaterials has driven researchers to seek more effective tools and analysis methods. In this study, statistical models are used to study sequence features of the recently reported random heteropolymers (RHP), which transport protons across lipid bilayers selectively and rapidly like natural proton channels. We utilized the probabilistic graphical model framework and developed a generalized hidden semi-Markov model (GHSMM-RHP) to extract the function-determining sequence features, including the transmembrane segments within a chain and the sequence heterogeneity among different chains. We developed stochastic variational methods for efficient inference on parameter estimation and predictions, and empirically studied their computational performance from a comparative perspective on Bayesian (i.e., stochastic variational Bayes) versus frequentist (i.e., stochastic variational expectation-maximization) frameworks that have been studied separately before. The real data results agree well with the laboratory experiments, and suggest GHSMM-RHP's potential in predicting protein-like behavior at the polymer-chain level.},
  archiveprefix = {arXiv},
  langid = {english},
  keywords = {Quantitative Biology - Quantitative Methods,Statistics - Applications},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-12-19T09:17:14.594Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/L2RJLBT2/Zhou et al. - 2022 - Stochastic Variational Methods in Generalized Hidden Semi-Markov Models to Characterize Functionalit.pdf}
}

@article{zitoMachineLearningApproach2023,
  title = {A {{Machine Learning Approach}} to {{Simulate Gene Expression}} and {{Infer Gene Regulatory Networks}}},
  author = {Zito, Francesco and Cutello, Vincenzo and Pavone, Mario},
  year = 2023,
  month = aug,
  journal = {Entropy},
  volume = {25},
  number = {8},
  pages = {1214},
  publisher = {Multidisciplinary Digital Publishing Institute},
  issn = {1099-4300},
  doi = {10.3390/e25081214},
  urldate = {2025-09-21},
  abstract = {The ability to simulate gene expression and infer gene regulatory networks has vast potential applications in various fields, including medicine, agriculture, and environmental science. In recent years, machine learning approaches to simulate gene expression and infer gene regulatory networks have gained significant attention as a promising area of research. By simulating gene expression, we can gain insights into the complex mechanisms that control gene expression and how they are affected by various environmental factors. This knowledge can be used to develop new treatments for genetic diseases, improve crop yields, and better understand the evolution of species. In this article, we address this issue by focusing on a novel method capable of simulating the gene expression regulation of a group of genes and their mutual interactions. Our framework enables us to simulate the regulation of gene expression in response to alterations or perturbations that can affect the expression of a gene. We use both artificial and real benchmarks to empirically evaluate the effectiveness of our methodology. Furthermore, we compare our method with existing ones to understand its advantages and disadvantages. We also present future ideas for improvement to enhance the effectiveness of our method. Overall, our approach has the potential to greatly improve the field of gene expression simulation and gene regulatory network inference, possibly leading to significant advancements in genetics.},
  copyright = {http://creativecommons.org/licenses/by/3.0/},
  langid = {english},
  keywords = {complex network,gene regulatory network,machine learning,metaheuristic,reverse engineering,time-series forecasting},
  annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-09-23T11:03:33.438Z},
  file = {/home/louis/snap/zotero-snap/common/Zotero/storage/HM3462DA/Zito et al. - 2023 - A Machine Learning Approach to Simulate Gene Expression and Infer Gene Regulatory Networks.pdf}
}